In [1]:
#import libraries
import warnings
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib
import matplotlib.pyplot as plt
import sklearn
from sklearn.model_selection import train_test_split 
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score
import os
warnings.filterwarnings("ignore") # Disable deprecation warnings 

In [3]:
#import dataframe
path = r'C:\Users\nrsmi\OneDrive\Documents\CareerFoundry\09-2024 Advanced Analytics & Dashboard\Python\10-2024 Analyzing Global Temperature Changes'
df = pd.read_csv(os.path.join(path, '02_Data', 'Prepared Data', 'df_TC_climate_region_CLEAN.csv'), index_col = False)

In [7]:
df.head()

Unnamed: 0,Country,Months,Element,Year,Temperature,Region,Climate
0,Afghanistan,January,Temperature Change,1961,0.777,Asia,Continental
1,Afghanistan,February,Temperature Change,1961,-1.743,Asia,Continental
2,Afghanistan,March,Temperature Change,1961,0.516,Asia,Continental
3,Afghanistan,April,Temperature Change,1961,-1.709,Asia,Continental
4,Afghanistan,May,Temperature Change,1961,1.412,Asia,Continental


In [5]:
# Calculate the average temperature for each country
df2 = df.groupby('Country')['Temperature'].mean().reset_index()

# Rename columns for clarity
df2.rename(columns={'Temperature': 'Average Temperature'}, inplace=True)

df2.head()

Unnamed: 0,Country,Average Temperature
0,Afghanistan,0.436037
1,Albania,0.485035
2,Algeria,0.716062
3,American Samoa,0.432105
4,Andorra,0.699853


In [9]:
df2.to_csv(os.path.join(path, '02_Data','Prepared Data', 'df_TC_climate_region_CLEAN_avgtemp.csv'))

In [27]:
#create Temperature Change column
df2['Temperature Group'] = ""

#create temperature change flag
df2.loc[df2['Average Temperature'] <= 0.43, 'Temperature Group'] = 'Cold Fluctuating'

df2.loc[(df2['Average Temperature'] >= 0.43) & (df2['Average Temperature'] < 0.98 ), 'Temperature Group'] = 'Cold and Hot Fluctuating'

df2.loc[df2['Average Temperature'] >= 0.98, 'Temperature Group'] = 'Hot Fluctuating'

In [29]:
df2.head()

Unnamed: 0,Country,Average Temperature,Temperature Group
0,Afghanistan,0.436037,Cold and Hot Fluctuating
1,Albania,0.485035,Cold and Hot Fluctuating
2,Algeria,0.716062,Cold and Hot Fluctuating
3,American Samoa,0.432105,Cold and Hot Fluctuating
4,Andorra,0.699853,Cold and Hot Fluctuating


In [25]:
df2.isnull().sum()

Country                0
Average Temperature    0
Temperature Group      0
dtype: int64

In [31]:
df2.to_csv(os.path.join(path, '02_Data','Prepared Data', 'df_TC_climate_region_CLEAN_TemperatureGroup2.csv'))

In [39]:
# Calculate the average temperature for each climate
df3 = df.groupby('Climate')['Temperature'].mean().reset_index()

# Rename columns for clarity
df3.rename(columns={'Temperature': 'Average Temperature Change'}, inplace=True)

df3.head()

Unnamed: 0,Climate,Average Temperature Change
0,Arid,0.507842
1,Continental,0.534076
2,MISC,0.33106
3,Polar,0.362449
4,Temperate,0.689605


In [41]:
df3.to_csv(os.path.join(path, '02_Data','Prepared Data', 'AvgTemp_by_Climate.csv'))

In [37]:
# Calculate the average temperature for each region
df4 = df.groupby('Region')['Temperature'].mean().reset_index()

# Rename columns for clarity
df4.rename(columns={'Temperature': 'Average Temperature Change'}, inplace=True)

df4.head()

Unnamed: 0,Region,Average Temperature Change
0,Africa,0.499506
1,Americas,0.433847
2,Asia,0.458953
3,Europe,0.688844
4,Non-country,0.334231


In [43]:
df4.to_csv(os.path.join(path, '02_Data','Prepared Data', 'AvgTemp_by_Region.csv'))

In [47]:
# Combining Year and Month into a datetime format
df_5 = df.copy()
df_5['Date'] = pd.to_datetime(df_5['Year'].astype(str) + '-' + df_5['Months'] + '-01', errors='coerce')

df_5.head()

Unnamed: 0,Country,Months,Element,Year,Temperature,Region,Climate,Date
0,Afghanistan,January,Temperature Change,1961,0.777,Asia,Continental,1961-01-01
1,Afghanistan,February,Temperature Change,1961,-1.743,Asia,Continental,1961-02-01
2,Afghanistan,March,Temperature Change,1961,0.516,Asia,Continental,1961-03-01
3,Afghanistan,April,Temperature Change,1961,-1.709,Asia,Continental,1961-04-01
4,Afghanistan,May,Temperature Change,1961,1.412,Asia,Continental,1961-05-01


In [51]:
df_5.to_csv(os.path.join(path, '02_Data','Prepared Data', 'df_TC_climate_region_CLEAN_Date.csv'))