In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

%matplotlib inline

In [2]:
!pip install prophet



In [3]:
from prophet import Prophet
from prophet.plot import plot_plotly, plot_components_plotly

In [4]:
dirTrain = 'data/DailyDelhiClimateTrain.csv'
df = pd.read_csv(dirTrain)

df.head()

Unnamed: 0,date,meantemp,humidity,wind_speed,meanpressure
0,2013-01-01,10.0,84.5,0.0,1015.666667
1,2013-01-02,7.4,92.0,2.98,1017.8
2,2013-01-03,7.166667,87.0,4.633333,1018.666667
3,2013-01-04,8.666667,71.333333,1.233333,1017.166667
4,2013-01-05,6.0,86.833333,3.7,1016.5


In [5]:
df.describe()

Unnamed: 0,meantemp,humidity,wind_speed,meanpressure
count,1462.0,1462.0,1462.0,1462.0
mean,25.495521,60.771702,6.802209,1011.104548
std,7.348103,16.769652,4.561602,180.231668
min,6.0,13.428571,0.0,-3.041667
25%,18.857143,50.375,3.475,1001.580357
50%,27.714286,62.625,6.221667,1008.563492
75%,31.305804,72.21875,9.238235,1014.944901
max,38.714286,100.0,42.22,7679.333333


In [6]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1462 entries, 0 to 1461
Data columns (total 5 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   date          1462 non-null   object 
 1   meantemp      1462 non-null   float64
 2   humidity      1462 non-null   float64
 3   wind_speed    1462 non-null   float64
 4   meanpressure  1462 non-null   float64
dtypes: float64(4), object(1)
memory usage: 57.2+ KB


In [7]:
#Convert date to datetime format
df['date'] = pd.to_datetime(df['date'], format = "%Y-%m-%d")
df['year'] = df['date'].dt.year
df['month'] = df['date'].dt.month

df.head()

Unnamed: 0,date,meantemp,humidity,wind_speed,meanpressure,year,month
0,2013-01-01,10.0,84.5,0.0,1015.666667,2013,1
1,2013-01-02,7.4,92.0,2.98,1017.8,2013,1
2,2013-01-03,7.166667,87.0,4.633333,1018.666667,2013,1
3,2013-01-04,8.666667,71.333333,1.233333,1017.166667,2013,1
4,2013-01-05,6.0,86.833333,3.7,1016.5,2013,1


In [8]:
import plotly.express as px

fig = px.line(df, x='date', y = 'meantemp', title='Mean Temperature Trend Over Time')
fig.show()

In [9]:
fig = px.line(df, x='date', y='humidity', title='Mean Humidity Trend Over Time')
fig.show()

In [16]:
grouped = df.groupby('month').mean()
grouped.head()

Unnamed: 0_level_0,date,meantemp,humidity,wind_speed,meanpressure,year
month,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
1,2014-07-24 16:30:43.200000000,13.311914,78.6035,4.945358,1018.15241,2014.52
2,2014-08-20 23:21:46.194690304,17.620422,67.959397,6.531042,1015.349232,2014.513274
3,2014-09-14 18:00:00.000000000,22.914103,60.698463,7.696836,1066.531388,2014.5
4,2014-10-15 06:00:00.000000000,29.376614,39.036537,8.783093,1006.975741,2014.5
5,2014-11-14 18:00:00.000000000,33.315889,35.739941,9.022982,1001.975698,2014.5


In [17]:
grouped = grouped.reset_index()
grouped.head()

Unnamed: 0,month,date,meantemp,humidity,wind_speed,meanpressure,year
0,1,2014-07-24 16:30:43.200000000,13.311914,78.6035,4.945358,1018.15241,2014.52
1,2,2014-08-20 23:21:46.194690304,17.620422,67.959397,6.531042,1015.349232,2014.513274
2,3,2014-09-14 18:00:00.000000000,22.914103,60.698463,7.696836,1066.531388,2014.5
3,4,2014-10-15 06:00:00.000000000,29.376614,39.036537,8.783093,1006.975741,2014.5
4,5,2014-11-14 18:00:00.000000000,33.315889,35.739941,9.022982,1001.975698,2014.5


In [20]:
x = ['January', 'February', 'March', 'April', 'May', 'June', 'July', 'August', 'Septmber', 'October', 'November', 'December']
fig = px.bar(grouped, x=x, y='meantemp', title='Mean Temperature Grouped by Month')
fig.update_layout(xaxis_title='Month', yaxis_title='Mean Temp')
fig.show()

# Timeseries Modeling

In [22]:
data = df.rename(columns = {
    "date": 'ds',
    "meantemp": 'y'
})

data.head()

Unnamed: 0,ds,y,humidity,wind_speed,meanpressure,year,month
0,2013-01-01,10.0,84.5,0.0,1015.666667,2013,1
1,2013-01-02,7.4,92.0,2.98,1017.8,2013,1
2,2013-01-03,7.166667,87.0,4.633333,1018.666667,2013,1
3,2013-01-04,8.666667,71.333333,1.233333,1017.166667,2013,1
4,2013-01-05,6.0,86.833333,3.7,1016.5,2013,1


In [26]:
model = Prophet()
model.fit(data)
forecasts = model.make_future_dataframe(periods=730)

INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
DEBUG:cmdstanpy:input tempfile: /tmp/tmpfsuudtkm/pz887l04.json
DEBUG:cmdstanpy:input tempfile: /tmp/tmpfsuudtkm/aw16njnk.json
DEBUG:cmdstanpy:idx 0
DEBUG:cmdstanpy:running CmdStan, num_threads: None
DEBUG:cmdstanpy:CmdStan args: ['/usr/local/lib/python3.10/dist-packages/prophet/stan_model/prophet_model.bin', 'random', 'seed=50032', 'data', 'file=/tmp/tmpfsuudtkm/pz887l04.json', 'init=/tmp/tmpfsuudtkm/aw16njnk.json', 'output', 'file=/tmp/tmpfsuudtkm/prophet_modeldv289fzj/prophet_model-20240516064356.csv', 'method=optimize', 'algorithm=lbfgs', 'iter=10000']
06:43:56 - cmdstanpy - INFO - Chain [1] start processing
INFO:cmdstanpy:Chain [1] start processing
06:43:56 - cmdstanpy - INFO - Chain [1] done processing
INFO:cmdstanpy:Chain [1] done processing


In [28]:
preds = model.predict(forecasts)

plot_plotly(model, preds)

In [29]:
testDir = "data/DailyDelhiClimateTest.csv"
test_df = pd.read_csv(testDir)
test_df.head()

Unnamed: 0,date,meantemp,humidity,wind_speed,meanpressure
0,2017-01-01,15.913043,85.869565,2.743478,59.0
1,2017-01-02,18.5,77.222222,2.894444,1018.277778
2,2017-01-03,17.111111,81.888889,4.016667,1018.333333
3,2017-01-04,18.7,70.05,4.545,1015.7
4,2017-01-05,18.388889,74.944444,3.3,1014.333333


In [54]:
test_df.shape

(114, 7)

In [30]:
test_df['date'] = pd.to_datetime(test_df['date'], format = "%Y-%m-%d")
test_df['year'] = test_df['date'].dt.year
test_df['month'] = test_df['date'].dt.month

test_df.head()

Unnamed: 0,date,meantemp,humidity,wind_speed,meanpressure,year,month
0,2017-01-01,15.913043,85.869565,2.743478,59.0,2017,1
1,2017-01-02,18.5,77.222222,2.894444,1018.277778,2017,1
2,2017-01-03,17.111111,81.888889,4.016667,1018.333333,2017,1
3,2017-01-04,18.7,70.05,4.545,1015.7,2017,1
4,2017-01-05,18.388889,74.944444,3.3,1014.333333,2017,1


# Use the trained model to forecast weather patterns for test data

In [33]:
test_data = test_df.rename(columns = {
    'date' : 'ds',
    'meantemp' : 'y'
})

test_data.head()

Unnamed: 0,ds,y,humidity,wind_speed,meanpressure,year,month
0,2017-01-01,15.913043,85.869565,2.743478,59.0,2017,1
1,2017-01-02,18.5,77.222222,2.894444,1018.277778,2017,1
2,2017-01-03,17.111111,81.888889,4.016667,1018.333333,2017,1
3,2017-01-04,18.7,70.05,4.545,1015.7,2017,1
4,2017-01-05,18.388889,74.944444,3.3,1014.333333,2017,1


In [34]:
preds = model.predict(test_data)

In [45]:
fig = px.line(preds, x='ds', y='yhat', title="Predicted vs Actual Data")
fig.add_scatter(x=test_df['date'], y=test_df['meantemp'], mode='lines', name='Actual Mean Temperatures')
fig.add_scatter(x=preds['ds'], y=preds['yhat'], mode='lines', name='Predicted Mean Temperatures')

fig.update_layout(legend=dict(
    orientation="h",
    yanchor="bottom",
    y=1.02,
    xanchor="right",
    x=1
))

fig.show()