In [1]:
#import statement

import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns

In [None]:
# read in the data

df= pd.read_csv('eda_manipulate_date_strings_with_python.csv')
df.head()

In [None]:
# convert the 'date' column to datetime
df['date'] = pd.to_datetime(df['date'])

In [None]:
# create four new columns

df['week'] = df['date'].dt.strftime('%Y-W%V')
df['month'] = df['date'].dt.strftime('%Y-%m')
df['quarter'] = df['date'].dt.to_period('Q').dt.strftime('%Y-Q%q')
df['year'] = df['date'].dt.strftime('%Y')

df.head(10)

In [None]:
# Plot the number of weekly lightningstrikes in 2018

# Create a new dataframe view of just 2018 data, summed by week.
df_by_week_2018 = df[df['year'] == '2018'].groupby(['week'].sum().reset_index()
df_by_week_2018.head()

# Plot the bar graph of weekly strike totals in 2018
plt.bar(x = df_by_week_2018['week'], height  = df_by_week_2018['number_of_strikes'])
plt.plot()
plt.xlabel("Week Number")
plt.ylabel("Number of Lightning Strikes")
plt.title("Number of Lightning Strikes per Week(2018)")
plt.show()

In [None]:
#Increase the output size
plt.figure(figsize = (20, 5))
plt.bar(x = df_by_week_2018['week'], height  = df_by_week_2018['number_of_strikes'])
plt.plot()
plt.xlabel("Week Number")
plt.ylabel("Number of Lightning Strikes")
plt.title("Number of Lightning Strikes per Week(2018)")
plt.xticks(rotation = 45, fontsize = 8)
plt.show()

In [None]:
# Plot the number of quarterly lightning strikes from 2016–2018

df_by_quarter = df['number_of_strikes'].div(1000000)
df_by_quarter.head()

In [None]:
# Format the numbers

# Group 2016-2018 data by quarter and sum.
df_by_quarter = df.groupby(['quarter']).sum().reset_index()

# Format as text, in millions.
df_by_quarter['number_of_strikes_formatted'] = df_by_quarter['number_of_strikes'].div(1000000).round(1).astype(str) + 'M'

df_by_quarter.head()

In [None]:
#add labels

def addlabels(x, y, labels):
    '''
    Iterates over data and plots text labels above each bar of bar graph.
    '''
    for i in range(len(x)):
        plt.text(i, y[i], labels[i], ha = 'center', va = 'bottom')

In [None]:
# plot the bar graph
plt.figure(figsize = (15, 5))
plt.bar(x = df_by_quarter['quarter'], height = df_by_quarter['number_of_strikes'])
addlabels(df_by_quarter['quarter'], df_by_quarter['number_of_strikes'], df_by_quarter['number_of_strikes_formatted'])
plt.plot()
plt.xlabel('Quarter')
plt.ylabel('Number of lightning strikes')
plt.title('Number of lightning strikes per quarter (2016-2018)')
plt.show()

In [None]:
#create a grouped bar chart
# Create two new columns.
df_by_quarter['quarter_number'] = df_by_quarter['quarter'].str[-2:]
df_by_quarter['year'] = df_by_quarter['quarter'].str[:4]
df_by_quarter.head()

In [None]:
#Fill in the chart parameters
plt.figure(figsize = (15, 5))
p = sns.barplot(
    data = df_by_quarter,
    x = 'quarter_number',
    y = 'number_of_strikes',
    hue = 'year')
for b in p.patches:
    p.annotate(str(round(b.get_height()/1000000, 1))+'M', 
                   (b.get_x() + b.get_width() / 2., b.get_height() + 1.2e6), 
                   ha = 'center', va = 'bottom', 
                   xytext = (0, -12), 
                   textcoords = 'offset points')
plt.xlabel("Quarter")
plt.ylabel("Number of lightning strikes")
plt.title("Number of lightning strikes per quarter (2016-2018)")
plt.show()