# introduction 

In [1]:
import pandas as pd 
import numpy as np 
import seaborn as sns 
import matplotlib.pyplot as plt 
import sys 
from sklearn.impute import SimpleImputer
import random
from plotly.offline import iplot
from plotnine import ggplot, aes, geom_line, geom_bar, geom_point, labs, coord_flip, theme_xkcd, geom_smooth
import json 
import warnings 


# Introduction 
This project aims to explore the topic of climate change and show the corralations between CO2 levels polar ice and rainfall as the key variables, This research also seeks to investigate climate change signals within the context of Ireland's meteorological observations and harnesses data fusion techniques to amalgamate multiple datasets into a structured pandas dataframe.

# polar ice

In [None]:
seaice = pd.read_csv('data/seaice.csv') #reading in the seaice dataset


In [None]:
seaice.info() #checking the info on dataset

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 26354 entries, 0 to 26353
Data columns (total 7 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   Year          26354 non-null  int64  
 1    Month        26354 non-null  int64  
 2    Day          26354 non-null  int64  
 3        Extent   26354 non-null  float64
 4       Missing   26354 non-null  float64
 5    Source Data  26354 non-null  object 
 6   hemisphere    26354 non-null  object 
dtypes: float64(2), int64(3), object(2)
memory usage: 1.4+ MB


In [None]:
southern = seaice[seaice.iloc[:,6]=='south'] #taking only the information from the south hempisphere

In [None]:
years_list = list(southern['Year'].value_counts().index)
years_list = sorted(years_list)

In [None]:

warnings.simplefilter(action='ignore')
def by_months(southern):    
    s_months = 3 + ((2019-1979) * 12) + 5 
    seaice_monthly = pd.DataFrame(columns=['Year', 'Month', 'Extent'], index=range(s_months))
    i = 0
    for year in years_list:
        
        if year == 1978:
            months_list = range(10,13)
        elif year == 2019:
            months_list = range(1,6)
        else:
            months_list = range(1,13)

        for month in months_list:
            subset = southern[southern['Year']==year][southern.iloc[:,1]==month]
            monthly_mean = np.mean(subset.iloc[:,3])
            seaice_monthly.iloc[i] = [year, month, monthly_mean]
            i += 1

    return seaice_monthly

seaice_clean = by_months(southern)

In [None]:
seaice_clean.head()

Unnamed: 0,Year,Month,Extent
0,1978,10,17.699
1,1978,11,15.8964
2,1978,12,10.39525
3,1979,1,5.4022
4,1979,2,3.141071


In [None]:
seaice_clean.tail()

Unnamed: 0,Year,Month,Extent
483,2019,1,3.831323
484,2019,2,2.656214
485,2019,3,3.164161
486,2019,4,5.708633
487,2019,5,8.835516


In [66]:
def anomaliesSIE(seaice):
  #Calcule mean by months from 1981-2010
  df_30y = seaice.query('1981 <= Year <= 2010')
  df_30y = df_30y.reset_index().drop(['index'], axis=1)
  mean_30y = []
  for m in range(1,13):
    mu = seaice[seaice.Month == m]['Extent'].mean()
    mean_30y.append(mu)

  #Compute SIE anomalies
  anomalies_list = []
  for s in range(0, len(seaice)):
    mean_30y_index = seaice.loc[s, 'Month']-1
    #Calcule anomalies
    anomalies_SIE = seaice.loc[s, 'Extent'] - mean_30y[mean_30y_index]
    anomalies_SIE = anomalies_SIE/mean_30y[mean_30y_index]*100
    #Save anomalies
    anomalies_list.append(anomalies_SIE)
  seaice['AnomaliesSIE'] = anomalies_list
  return seaice, mean_30y

df_SIE, mean_30y = anomaliesSIE(seaice_clean)
df_SIE.head()

Unnamed: 0,Year,Month,Extent,AnomaliesSIE
0,1978,10,17.699,-2.349663
1,1978,11,15.8964,0.285111
2,1978,12,10.39525,-0.4029
3,1979,1,5.4022,8.505038
4,1979,2,3.141071,2.580231


# rainfall in Ireland 

In [67]:
Irish_rainfall = pd.read_csv('data/rainfall_ireland.csv') #loading in dataset 

In [68]:
Irish_rainfall.head() # looking at the start of the dataset

Unnamed: 0,Year,Month,Rainfall
0,1711,1,16.4
1,1711,2,73.1
2,1711,3,121.2
3,1711,4,85.2
4,1711,5,66.6


In [69]:
Irish_rainfall.tail() # looking at the end of dataset

Unnamed: 0,Year,Month,Rainfall
3667,2016,8,79.1
3668,2016,9,108.1
3669,2016,10,51.8
3670,2016,11,58.0
3671,2016,12,92.1


In [70]:
Irish_rainfall.info() #seeing information on the dataset 

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3672 entries, 0 to 3671
Data columns (total 3 columns):
 #   Column    Non-Null Count  Dtype  
---  ------    --------------  -----  
 0   Year      3672 non-null   int64  
 1   Month     3672 non-null   int64  
 2   Rainfall  3672 non-null   float64
dtypes: float64(1), int64(2)
memory usage: 86.2 KB


In [73]:
mean_years = []
std_dict = {}
for month in range(1, 13):
    mu = Irish_rainfall[Irish_rainfall.Month == month]['Rainfall'].mean()
    std = Irish_rainfall[Irish_rainfall.Month == month]['Rainfall'].std()
    mean_years.append(mu)
    std_dict[mean] = std