version 1.2

# Data Analysis using Pandas and Matplotlib

A good source of data sets https://data.gov.ie/ <br>
Met Eireann Historical data available:<br>

https://www.met.ie/climate/available-data/long-term-data-sets

In this lesson we are going to import historical data from Met eireann and analyise it using Panda's and plot the results using Matplotlib.



In [None]:
# import the list of stations
import pandas as pd
# Plot Library
from matplotlib import pyplot as plt
import numpy as np

## Import the CSV and Print Dataframe

In [None]:
# No need for a directory path if in the same directory
df = pd.read_csv("/Users/morgan/Documents/Courses/Python/MyModules/DublinAirportClean.csv")

In [None]:
print(df)

In [None]:
df.columns

In [None]:
df

## Changing the Index

In [None]:
# Make the Year the index value permanently
df.set_index('Year', inplace = True)
#df.reset_index(inplace = True)

In [None]:
df

In [None]:
# df.loc[2009]
df.loc[2009, 'Mar']

In [None]:
# Output the year values( )
df.index


In [None]:
## Using iloc
df['Jan']
rain_y = df.iloc[5]
print(rain_y)

In [None]:
#Simple Plot
plt.rcParams['figure.figsize'] = [8, 5]# set the size of your graph
plt.style.use('Solarize_Light2') # style
plt.plot(df.index,df.Jan)
plt.plot(df.index,df.Aug)

## Create a filter
Our objective is to print out a bar chart that prints out the rain values for January, August and November at five or ten year intervals.

In [None]:
# Create a list with a fewer number of years
nums = range(1850,2010,10)
my_list = []
for n in nums:
    my_list.append(n)
print(my_list)

In [None]:
# Filter out the data for those years
filt = df.index.isin(my_list)
print(filt)

In [None]:
x_values = df.loc[filt, 'Aug']
print(x_values)

## Create a new dataframe with the information that we want


In [None]:
# Create a new DataFrame with the data we want
newDF = df.loc[filt, ['Jan', 'Aug', 'Nov']]
print(newDF)

In [None]:
# Print our new plot
plt.plot(newDF.index,newDF.Jan)
plt.plot(newDF.index,newDF.Aug)
plt.plot(newDF.index,newDF.Nov)

In [None]:
# Change to a bar plot
plt.bar(newDF.index,newDF.Jan)
plt.bar(newDF.index,newDF.Aug)
plt.bar(newDF.index,newDF.Nov)





## Hmmm...
Let us try to sort out this mess...

In [None]:
plt.style.use('Solarize_Light2')
plt.rcParams['figure.figsize'] = [14, 6]
width = 2.00
offset = 2.00
plt.bar(newDF.index - offset, newDF.Jan, width = width, label = 'January Rain')
plt.bar(newDF.index ,  newDF.Aug, width = width, color = 'k', label = 'August Rain')
plt.bar(newDF.index + offset, newDF.Nov, width = width, label = 'November Rain')


plt.title('Rain- Dublin Airport(1860-2010)')
plt.xlabel('Year')
plt.ylabel('Rain mm')
plt.grid(True)
plt.legend()

# Statistical Analysis

## Simple Stats

In [None]:
# Mean and Median
print(df['Jan'].mean())

newDF['Jan'].mean()

In [None]:
# Statistic on all of the dataframe
df.mean()

In [None]:
# General Statistics
df.describe()

## Grouping Data
When grouping data it is a three phase process. Split, apply a function, recombine.

In [None]:
# load a new set of data for Killarney
kill_df = pd.read_csv("/Users/morgan/Documents/Courses/Python/MyModules/Killarney_1881-1933.csv")
kill_df

In [None]:
kill_df.shape

In [None]:
year_group = kill_df.groupby(['Year'])
print(kill_df["Max (F)"])

In [None]:
year_group.get_group(1933)

In [None]:
# Same as filter 
filt = kill_df['Year']== 1933
kill_df.loc[filt]

In [None]:
kill_df.loc[filt,['Min (C)']].min()

In [65]:
#Get the lowest temp for each year from 1881
year_group['Min (C)'].min()

Year
1881    -7.4
1882   -11.4
1883    -7.3
1884    -6.6
1885    -7.1
1886    -9.3
1887    -7.8
1888    -8.4
1889    -5.3
1890    -8.3
1891    -8.4
1892    -9.7
1893    -6.2
1894   -10.4
1895    -9.7
1896    -7.2
1897    -7.2
1898    -5.8
1899     NaN
1900     NaN
1901     NaN
1902     NaN
1903     NaN
1904     NaN
1905     NaN
1906     NaN
1907     NaN
1908     NaN
1909     NaN
1910     NaN
1911     NaN
1912     NaN
1913     NaN
1914     NaN
1915     NaN
1916     NaN
1917     NaN
1918     NaN
1919     NaN
1920    -6.1
1921    -3.6
1922    -3.9
1923    -5.0
1924    -5.0
1925    -4.4
1926    -5.0
1927    -3.9
1928    -4.4
1929    -5.8
1930    -3.9
1931    -5.6
1932    -3.9
1933    -5.6
Name: Min (C), dtype: float64

In [67]:
# load a new set of data for Killarney
black_df = pd.read_csv("/Users/morgan/Documents/Courses/Python/MyModules/blacksod2.csv")

In [68]:
black_df

Unnamed: 0,year,month,ind,rain,gdf,rd,wd
0,1941,1,0,52.3,7.3,21,18
1,1941,2,0,115.0,16.5,25,18
2,1941,3,0,74.9,9.6,17,17
3,1941,4,0,32.2,7.7,14,10
4,1941,5,0,37.5,8.5,14,12
...,...,...,...,...,...,...,...
184,1956,5,0,109.7,27.1,23,13
185,1956,6,0,89.0,17.8,24,15
186,1956,7,0,119.8,21.6,23,15
187,1956,8,0,116.2,16.5,27,22


# Widgets

## SAQ 1
Go to Met Eireann and download a set of data which is suitable for grouping. Import the csv file and group the data.

In [69]:
# The widgets library
import ipywidgets as widgets
from ipywidgets import interactive
from ipywidgets import interact

# Define a function
def f(x):
    return x

In [70]:
 #When you move the slider, the function is called, and its return value is printed.
interact(f, x=10);

interactive(children=(IntSlider(value=10, description='x', max=30, min=-10), Output()), _dom_classes=('widget-…

In [71]:

x=90
widgets.IntSlider(x)

IntSlider(value=90)

In [77]:
%matplotlib inline
from ipywidgets import interactive
import matplotlib.pyplot as plt
import numpy as np
plt.style.use('Solarize_Light2')
plt.rcParams['figure.figsize'] = [14, 7]

def f(m, c):
    plt.figure(2)
    x = np.linspace(-10, 10, num=1000)
    plt.plot(x, m * x**2 + 3*x + c)
    plt.ylim(-5, 5)
    plt.show()

interactive_plot = interactive(f, m=(-2.0, 2.0), c=(-3, 3, 0.5))
output = interactive_plot.children[-1]
output.layout.height = '450px'
interactive_plot

interactive(children=(FloatSlider(value=0.0, description='m', max=2.0, min=-2.0), FloatSlider(value=0.0, descr…

# Appendix

## Clunky Graph Solution

In [None]:
jan_rain = df.loc[filt, ['Jan']]
aug_rain = df.loc[filt, ['Aug']]
nov_rain = df.loc[filt, ['Nov']]

In [None]:
year = df.loc[filt, ['Jan']]
print(year)

In [None]:
list_jan = jan_rain['Jan'].to_list()
list_aug = aug_rain['Aug'].to_list()
list_nov = nov_rain['Nov'].to_list()

data= {
      'Jan_rain': list_jan,
      'Aug_rain': list_aug,
      'Nov_rain': list_nov
      }
df_new = pd.DataFrame([filt],data)
df_new
#print(jan_rain['Jan'].to_list())
# print(jan_rain)

In [None]:
print(nov_rain)

In [None]:
# Set the size of the plot
plt.rcParams['figure.figsize'] = [10, 6]

plt.bar(df_new['Year'],df_new.Jan_rain)
plt.bar(df_new['Year'],df_new.Aug_rain)




In [None]:
# Set the size of the plot
plt.rcParams['figure.figsize'] = [14, 6]

#Get a list of the years that we want
nums = range(1860,2010,10)
years = []
for n in nums:
    years.append(n)   

    # Use the values in years to filter our datframe    
filt = df['Year'].isin(years)

year = df.loc[filt, ['Year']]
# Get the values for the relevant months as a series
jan_rain = df.loc[filt, ['Jan']]
aug_rain = df.loc[filt, ['Aug']]
nov_rain = df.loc[filt, ['Nov']]

#Convert series to a list
list_jan = jan_rain['Jan'].to_list()
list_aug = aug_rain['Aug'].to_list()
list_nov = nov_rain['Nov'].to_list()

data= {'Year': years,
      'Jan_rain': list_jan,
      'Aug_rain': list_aug,
      'Nov_rain': list_nov
      }
df_new = pd.DataFrame(data)
print(df_new)

# x_indices = np.arange(len(month))
width = 1.85

In [None]:
plt.style.use('Solarize_Light2')

plt.bar(df_new['Year'] - width, df_new.Jan_rain, width = width, label = 'January Rain')
plt.bar(df_new['Year'] ,  df_new.Aug_rain, width = width, color = 'k', label = 'August Rain')
plt.bar(df_new['Year'] + width, df_new.Nov_rain, width = width, label = 'November Rain')


plt.title('Rain- Dublin Airport(1860-2010)')
plt.xlabel('Year')
plt.ylabel('Rain mm')
plt.grid(True)
plt.legend()