![](https://derrickmartins.files.wordpress.com/2015/11/time-series-analysis.png)

# Time series analysis using Pandas

https://www.kaggle.com/mahirkukreja/delhi-weather-data

In [None]:
import calendar
import pandas as pd
import matplotlib.pyplot as plt
from random import random
%matplotlib inline

In [None]:
df = pd.read_csv("data/dataset.csv", parse_dates=['datetime_utc'], index_col='datetime_utc')

### Clean data
![](https://y1vwcp5chj-flywheel.netdna-ssl.com/wp-content/uploads/2016/10/blog-meme.jpg)

In [None]:
df.columns

In [None]:
df.describe()

In [None]:
# rename columns
new_cols = [x.replace(' _','') for x in df.columns]
df.columns = new_cols

In [None]:
# remove field with all NaNs
df = df.drop('precipm', axis=1)

In [None]:
# take care of wrong readings
df['pressurem'] = df['pressurem'].apply(lambda x: df['pressurem'].mean() if not 800<x<1500 else x)
df['hum'] = df['hum'].apply(lambda x: df['hum'].mean() if x>120 else x)
df['vism'] = df['vism'].apply(lambda x: df['vism'].mean() if x>10 else x)
df['wspdm'] = df['wspdm'].apply(lambda x: df['wspdm'].mean() if x>300 else x)
df['tempm'] = df['tempm'].apply(lambda x: df['tempm'].mean() if x>50 else x)

In [None]:
# extra columns for time analysis
df['hour'] = df.index.hour
df['day'] = df.index.day
df['month'] = df.index.month
df['year'] = df.index.year

### Quantitative plots over years/months/hours

In [None]:
# list of numeric columns
cols = ['dewptm', 'fog', 'hail', 'heatindexm', 'hum', 'pressurem', 'rain', 'snow', 'tempm', 'thunder', 'tornado', 
        'vism', 'wdird', 'wgustm', 'windchillm', 'wspdm']

In [None]:
# quantitative plots
fig, axes = plt.subplots(4, 4, figsize=(16, 16))

for idx, col in enumerate(cols):
    ax = axes[idx // 4, idx % 4]
    ax.plot(df.groupby('month')[col].mean())
    ax.set_title(col)

fig.subplots_adjust(wspace=0.3, hspace=0.3)

### Weather conditions trends per month
![](https://i.pinimg.com/originals/19/a5/66/19a566d915debc41d7e303666b7ea10f.jpg)

In [None]:
month_wise_conds = df.groupby(['month', 'conds'])['month'].count()

In [None]:
month_wise_conds_df = month_wise_conds.unstack(level=1)

In [None]:
month_wise_conds_df

In [None]:
# color dict of weather conditions
colordic = {}
for idx, cond in enumerate(month_wise_conds_df.columns.values):
    colordic[cond] = (random(), random(), random(), 1)

In [None]:
fig, axes = plt.subplots(4, 3, figsize=(15, 15))

for i, (idx, row) in enumerate(month_wise_conds_df.iterrows()):
    ax = axes[i // 3, i % 3]
    # remove conditions with small percentage
    row = row[row.gt(row.sum() * .03)]
    # set color list for pie chart
    colors = [colordic[val] for val in row.index]
    ax.pie(row, labels=row.index, colors=colors)
    ax.set_title(calendar.month_name[idx])

fig.subplots_adjust(wspace=1.0, hspace=0.2)

In [None]:
# what has been the weather like on this day for past 20 years?
df.query("month == 8 and day == 1 and hour == 15")['conds'].value_counts().plot.pie(figsize=(8, 8), autopct='%1.0f%%')

In [None]:
# month wise data over the years
pv = pd.pivot_table(df.query("year%5 == 0"), index=['month'], columns=['year'],
                    values='vism', aggfunc='mean')

In [None]:
pv.plot()

## Depressing, but true...
![](https://s3.scoopwhoop.com/anj/sw/a1153a33-b90b-4fc2-a569-745a78b950e8.jpg)