# Scale your pandas workflows by changing one line of code #
- https://github.com/modin-project/modin
    
## With Pandas ##
```
import pandas as pd
```
## With modin pandas ##
```
import modin.pandas as pd
```

In [None]:
!python --version

In [None]:
!pip show pandas

In [None]:
!pip show modin

In [None]:
!ls  -lha ../wildfire/wildfire-data 

In [None]:
from datetime import datetime
import pandas as pdo 
import modin.pandas as pd

In [None]:
from modin.config import Engine

In [None]:
Engine.put("dask")

In [None]:
from distributed import Client
client = Client()

In [None]:
!ls -lah

In [None]:
source = "mix_fire.csv"

## Reading CSV (not zipped)

In [None]:
t0 = datetime.now()
df_pd = pdo.read_csv(source, low_memory=False)
end = datetime.now()
end.strftime('%Y-%m-%d %H:%M:%S')
f'Total time {(end - t0).seconds} (s)'

In [None]:
df_pd.shape

In [None]:
t0 = datetime.now()
df = pd.read_csv(source)
end = datetime.now()
end.strftime('%Y-%m-%d %H:%M:%S')
f'Total time {(end - t0).seconds} (s)'

In [None]:
df.shape

## Reading gzip CSV (Zipped)

In [None]:
source_gz = 'mix_fire.csv.gz'

In [None]:
t0 = datetime.now()
df_pd_gz = pdo.read_csv(source_gz, low_memory=False)
end = datetime.now()
end.strftime('%Y-%m-%d %H:%M:%S')
f'Total time {(end - t0).seconds} (s)'

In [None]:
t0 = datetime.now()
df_mo_gz = pd.read_csv(source_gz)
end = datetime.now()
end.strftime('%Y-%m-%d %H:%M:%S')
f'Total time {(end - t0).seconds} (s)'

## checking dataframes

In [None]:
print(df_pd_gz.shape)
print(df_mo_gz.shape)

In [None]:
PRECISION = 1

## Using Modin Pandas

In [None]:
t_start = datetime.now()
## ----
df = df_pd_gz[['latitude', 'longitude', 'acq_date', 'confidence']]
df.latitude = df.latitude.round(PRECISION)
df.longitude = df.longitude.round(PRECISION)
df['acq_date'] = pd.to_datetime(df['acq_date'])
df['year'] = df.acq_date.dt.year
df['month'] = df.acq_date.dt.month
total_fires = df.groupby(['latitude', 'longitude', 'year', 'month']).size().reset_index()
total_fires.columns = ['latitude', 'longitude', 'year', 'month', 'fire_count']
yearly_fires = total_fires.groupby(['longitude', 'latitude', 'year', ]).count().reset_index()
global_fires = total_fires.groupby(['latitude', 'longitude']).sum().reset_index()
# ----
t_end = datetime.now()
t_end.strftime('%Y-%m-%d %H:%M:%S')
f'Total time {(t_end - t_start).seconds} (s)'

In [None]:
print(yearly_fires.shape)
yearly_fires

In [None]:
print(global_fires.shape)
global_fires

In [None]:
import plotly.express as px
import plotly.figure_factory as ff
import numpy as np
MAPBOX_TOKEN = 'pk.eyJ1IjoiZm9kZ2Fib3JtYXRoIiwiYSI6ImNrZmY3Nzc2bjBiemkyeG8zdGNzcXgzMGIifQ.J0dZhMiuZTPVexL8nrpS6Q'
px.set_mapbox_access_token(MAPBOX_TOKEN)

In [None]:
t_start = datetime.now()
## ----
fig = ff.create_hexbin_mapbox(
    data_frame=global_fires, lat='latitude', lon='longitude',
    nx_hexagon=100, opacity=0.9, labels={"color": "Hotspot records"},
    color='fire_count', agg_func=np.sum, color_continuous_scale="Reds"
)
fig.show()
# ----
t_end = datetime.now()
t_end.strftime('%Y-%m-%d %H:%M:%S')
f'Total time {(t_end - t_start).seconds} (s)'

## Using Pandas 

In [None]:
t_start = datetime.now()
## ----
df = df_mo_gz[['latitude', 'longitude', 'acq_date', 'confidence']]
df.latitude = df.latitude.round(PRECISION)
df.longitude = df.longitude.round(PRECISION)
df['acq_date'] = pd.to_datetime(df['acq_date'])
df['year'] = df.acq_date.dt.year
df['month'] = df.acq_date.dt.month
total_fires = df.groupby(['latitude', 'longitude', 'year', 'month']).size().reset_index()
total_fires.columns = ['latitude', 'longitude', 'year', 'month', 'fire_count']
yearly_fires = total_fires.groupby(['longitude', 'latitude', 'year', ]).count().reset_index()
global_fires = total_fires.groupby(['latitude', 'longitude']).sum().reset_index()
# ----
t_end = datetime.now()
t_end.strftime('%Y-%m-%d %H:%M:%S')
f'Total time {(t_end - t_start).seconds} (s)'

In [None]:
print(global_fires.shape)
global_fires

In [None]:
t_start = datetime.now()
## ----
fig = ff.create_hexbin_mapbox(
    data_frame=global_fires, lat=0, lon=1,
    nx_hexagon=100, opacity=0.9, labels={"color": "Hotspot records"},
    color=4, agg_func=np.sum, color_continuous_scale="Reds"
)
fig.show()
# ----
t_end = datetime.now()
t_end.strftime('%Y-%m-%d %H:%M:%S')
f'Total time {(t_end - t_start).seconds} (s)'

In [None]:
global_fires.columns

In [None]:
fig = px.density_mapbox(global_fires, lat=0, lon=1, z=4, radius=10,
                        center=dict(lat=0, lon=180), zoom=0,
                        mapbox_style="stamen-terrain")
fig.show()