In [None]:
# In this checkpoint, we are going to work on the 'Climate change in Africa' dataset that was provided by the U.S global change research program.

# Dataset description : This dataset contains historical data about the daily min, max and average temperature fluctuation in 5 African countries (Egypt, Tunisia, Cameroon, Senegal, Angola) between 1980 and 2023.

# ➡️ Dataset link

# https://i.imgur.com/w2czdso.jpg


# Instructions

# Load the dataset into a data frame using Python.
# Clean the data as needed.
# Plot a line chart to show the average temperature fluctuations in Tunisia and Cameroon. Interpret the results.
# Zoom in to only include data between 1980 and 2005, try to customize the axes labels.
# Create Histograms to show temperature distribution in Senegal between [1980,2000] and [2000,2023] (in the same figure). Describe the obtained results.
# Select the best chart to show the Average temperature per country.
# Make your own questions about the dataset and try to answer them using the appropriate visuals.

In [None]:
import pandas as pd
African_climate = pd.read_csv('/content/Africa_climate_change.csv')

In [None]:
African_climate.head()

Unnamed: 0,DATE,PRCP,TAVG,TMAX,TMIN,COUNTRY
0,19800101 000000,,54.0,61.0,43.0,Tunisia
1,19800101 000000,,49.0,55.0,41.0,Tunisia
2,19800101 000000,0.0,72.0,86.0,59.0,Cameroon
3,19800101 000000,,50.0,55.0,43.0,Tunisia
4,19800101 000000,,75.0,91.0,,Cameroon


In [None]:
African_climate.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 464815 entries, 0 to 464814
Data columns (total 6 columns):
 #   Column   Non-Null Count   Dtype  
---  ------   --------------   -----  
 0   DATE     464815 non-null  object 
 1   PRCP     177575 non-null  float64
 2   TAVG     458439 non-null  float64
 3   TMAX     363901 non-null  float64
 4   TMIN     332757 non-null  float64
 5   COUNTRY  464815 non-null  object 
dtypes: float64(4), object(2)
memory usage: 21.3+ MB


In [None]:
African_climate.describe()

Unnamed: 0,PRCP,TAVG,TMAX,TMIN
count,177575.0,458439.0,363901.0,332757.0
mean,0.120941,77.029838,88.713969,65.548262
std,0.486208,11.523634,13.042631,11.536547
min,0.0,-49.0,41.0,12.0
25%,0.0,70.0,81.0,58.0
50%,0.0,80.0,90.0,68.0
75%,0.01,85.0,99.0,74.0
max,19.69,110.0,123.0,97.0


In [None]:
African_climate.isnull().sum()

DATE            0
PRCP       287240
TAVG         6376
TMAX       100914
TMIN       132058
COUNTRY         0
dtype: int64

In [None]:
# Remove duplicates
African_climate.drop_duplicates(inplace=True)

In [None]:
# Convert data types
African_climate['DATE'] = pd.to_datetime(African_climate['DATE'])

In [None]:
African_climate['PRCP'].fillna(0, inplace=True)
African_climate['TAVG'].fillna(African_climate['TAVG'].mean(), inplace=True)
African_climate['TMAX'].fillna(African_climate['TMAX'].mean(), inplace=True)
African_climate['TMIN'].fillna(African_climate['TMIN'].mean(), inplace=True)

In [None]:
# Save the cleaned dataset
African_climate.to_csv('cleaned_dataset.csv', index=False)

In [None]:
import plotly.express as px

In [None]:
# Plot the line chart
fig = px.line(African_climate, x='DATE', y='TAVG', title='Average temperature fluctuations')
fig.show()

Output hidden; open in https://colab.research.google.com to view.

In [None]:
fig.update_layout(title='Temperature Trends Over Time',
                  xaxis_title='Date',
                  yaxis_title='Average Temperature')

Output hidden; open in https://colab.research.google.com to view.

In [None]:
# Filter data for Senegal
African_climate_senegal = African_climate[African_climate['COUNTRY'] == 'Senegal']

# Filter data for the two periods
African_climate_1980_2000 = African_climate_senegal[(African_climate_senegal['DATE'] >= '1980-01-01') & (African_climate_senegal['DATE'] <= '2000-12-31')]
African_climate_2000_2023 = African_climate_senegal[(African_climate_senegal['DATE'] >= '2000-01-01') & (African_climate_senegal['DATE'] <= '2023-12-31')]

In [None]:
import plotly.graph_objects as go

fig = go.Figure()

# Adding histogram for 1980-2000 period
fig.add_trace(go.Histogram(x=African_climate_1980_2000['TAVG'], name='1980-2000', opacity=0.75, nbinsx=50))

# Adding histogram for 2000-2023 period
fig.add_trace(go.Histogram(x=African_climate_2000_2023['TAVG'], name='2000-2023', opacity=0.75, nbinsx=50))

# Customizing the layout
fig.update_layout(
    title='Temperature Distribution in Senegal (1980-2000 vs. 2000-2023)',
    xaxis_title='Average Temperature',
    yaxis_title='Count',
    barmode='overlay',
    legend_title_text='Period'
)

# Setting opacity for better visibility when overlapping
fig.update_traces(opacity=0.5)

# Display the plot
fig.show()

In [None]:
avg_temp_per_country = African_climate.groupby('COUNTRY')['TAVG'].mean().reset_index()
# Plot the bar chart with custom colors
fig = px.bar(avg_temp_per_country, x='COUNTRY', y='TAVG', title='Average Temperature per Country', labels={'TAVG': 'Average Temperature', 'COUNTRY': 'Country'}, color='TAVG', color_continuous_scale='Viridis')

# Display the plot
fig.show()

In [None]:
# How does the average temperature vary between different countries?
# Calculate average temperature per country
avg_temp_per_country = African_climate.groupby('COUNTRY')['TAVG'].mean().reset_index()

# Plot the bar chart
fig = px.bar(avg_temp_per_country, x='COUNTRY', y='TAVG', title='Average Temperature per Country', labels={'TAVG': 'Average Temperature', 'COUNTRY': 'Country'})
fig.show()