In [1]:
import pandas as pd

In [7]:
path = "Weather_Data.csv"
weather_data = pd.read_csv(path)
print(weather_data)

          Date  Temp9am  Temp3pm  MinTemp  MaxTemp  Rainfall RainToday  \
0     01-02-13     20.7     20.9     19.5     22.4      15.6       Yes   
1     02-02-13     22.4     24.8     19.5     25.6       6.0       Yes   
2     03-02-13     23.5     23.0     21.6     24.5       6.6       Yes   
3     04-02-13     21.4     20.9     20.2     22.8      18.8       Yes   
4     05-02-13     22.5     25.5     19.7     25.7      77.4       Yes   
...        ...      ...      ...      ...      ...       ...       ...   
3266  11-01-22     10.5     17.9      8.6     19.6       0.0        No   
3267  12-01-22     11.0     18.7      9.3     19.2       0.0        No   
3268  13-01-22     10.2     17.3      9.4     17.7       0.0        No   
3269  14-01-22     12.4     19.0     10.1     19.3       0.0        No   
3270  15-01-22      9.4     18.8      7.6     19.3       0.0        No   

      Evaporation  Sunshine WindGustDir  ...  WindDir9am WindDir3pm  \
0             6.2       0.0           W 

### Average WindSpeed Analysis:

In [11]:
avg_wind_speed_9am = weather_data['WindSpeed9am'].mean()
avg_wind_speed_3pm = weather_data['WindSpeed3pm'].mean()
print(f'Average Wind Speed at 9 AM: {avg_wind_speed_9am}')
print(f'Average Wind Speed at 3 PM: {avg_wind_speed_3pm}')


Average Wind Speed at 9 AM: 15.077040660348517
Average Wind Speed at 3 PM: 19.294405380617548


### Average Humidity Analysis

In [12]:
avg_humidity_9am = weather_data['Humidity9am'].mean()
avg_humidity_3pm = weather_data['Humidity3pm'].mean()
print(f'Average Humidity at 9 AM: {avg_humidity_9am}')
print(f'Average Humidity at 3 PM: {avg_humidity_3pm}')


Average Humidity at 9 AM: 68.24396209110364
Average Humidity at 3 PM: 54.69856313054112


 ### Average Pressure Analysis

In [13]:
avg_pressure_9am = weather_data['Pressure9am'].mean()
avg_pressure_3pm = weather_data['Pressure3pm'].mean()
print(f'Average Pressure at 9 AM: {avg_pressure_9am}')
print(f'Average Pressure at 3 PM: {avg_pressure_3pm}')


Average Pressure at 9 AM: 1018.3344237236321
Average Pressure at 3 PM: 1016.0030852950167


### Average Temperature Analysis

In [14]:
avg_temp_9am = weather_data['Temp9am'].mean()
avg_temp_3pm = weather_data['Temp3pm'].mean()
avg_min_temp = weather_data['MinTemp'].mean()
avg_max_temp = weather_data['MaxTemp'].mean()
print(f'Average Temperature at 9 AM: {avg_temp_9am}')
print(f'Average Temperature at 3 PM: {avg_temp_3pm}')
print(f'Average Minimum Temperature: {avg_min_temp}')
print(f'Average Maximum Temperature: {avg_max_temp}')


Average Temperature at 9 AM: 17.821461326811374
Average Temperature at 3 PM: 21.543656374197493
Average Minimum Temperature: 14.877101803729746
Average Maximum Temperature: 23.00556404769184


### Rainfall Analysis

In [15]:
total_rainfall = weather_data['Rainfall'].sum()
rainy_days_count = weather_data['RainToday'].value_counts().get('Yes', 0)
print(f'Total Rainfall: {total_rainfall}')
print(f'Number of Rainy Days: {rainy_days_count}')


Total Rainfall: 10932.2
Number of Rainy Days: 849


### Data Cleansing

In [24]:
print(weather_data.dtypes)
print(weather_data.head())


Date             datetime64[ns]
Temp9am                 float64
Temp3pm                 float64
MinTemp                 float64
MaxTemp                 float64
Rainfall                float64
RainToday                object
Evaporation             float64
Sunshine                float64
WindGustDir              object
WindGustSpeed             int64
WindDir9am               object
WindDir3pm               object
WindSpeed9am              int64
WindSpeed3pm              int64
Humidity9am               int64
Humidity3pm               int64
Pressure9am             float64
Pressure3pm             float64
Cloud9am                  int64
Cloud3pm                  int64
dtype: object
        Date  Temp9am  Temp3pm  MinTemp  MaxTemp  Rainfall RainToday  \
0 2013-02-01     20.7     20.9     19.5     22.4      15.6       Yes   
1 2013-02-02     22.4     24.8     19.5     25.6       6.0       Yes   
2 2013-02-03     23.5     23.0     21.6     24.5       6.6       Yes   
3 2013-02-04     21.4     

In [25]:
# Convert relevant columns to numeric, coercing errors to NaN
weather_data['Rainfall'] = pd.to_numeric(weather_data['Rainfall'], errors='coerce')
weather_data['Temp9am'] = pd.to_numeric(weather_data['Temp9am'], errors='coerce')
weather_data['Temp3pm'] = pd.to_numeric(weather_data['Temp3pm'], errors='coerce')



In [26]:
# Check for NaN values created during conversion
print(weather_data.isnull().sum())

Date             0
Temp9am          0
Temp3pm          0
MinTemp          0
MaxTemp          0
Rainfall         0
RainToday        0
Evaporation      0
Sunshine         0
WindGustDir      0
WindGustSpeed    0
WindDir9am       0
WindDir3pm       0
WindSpeed9am     0
WindSpeed3pm     0
Humidity9am      0
Humidity3pm      0
Pressure9am      0
Pressure3pm      0
Cloud9am         0
Cloud3pm         0
dtype: int64


In [28]:
# Handle NaN values (drop or fill)
weather_data.dropna(subset=['Rainfall', 'Temp9am', 'Temp3pm'], inplace=True)

In [29]:
# Convert date column to datetime format
weather_data['Date'] = pd.to_datetime(weather_data['Date'], format='%d-%m-%y')

In [30]:
# Final inspection of cleaned data
print(weather_data.info())
print(weather_data.head())

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3271 entries, 0 to 3270
Data columns (total 21 columns):
 #   Column         Non-Null Count  Dtype         
---  ------         --------------  -----         
 0   Date           3271 non-null   datetime64[ns]
 1   Temp9am        3271 non-null   float64       
 2   Temp3pm        3271 non-null   float64       
 3   MinTemp        3271 non-null   float64       
 4   MaxTemp        3271 non-null   float64       
 5   Rainfall       3271 non-null   float64       
 6   RainToday      3271 non-null   object        
 7   Evaporation    3271 non-null   float64       
 8   Sunshine       3271 non-null   float64       
 9   WindGustDir    3271 non-null   object        
 10  WindGustSpeed  3271 non-null   int64         
 11  WindDir9am     3271 non-null   object        
 12  WindDir3pm     3271 non-null   object        
 13  WindSpeed9am   3271 non-null   int64         
 14  WindSpeed3pm   3271 non-null   int64         
 15  Humidity9am    3271 n

In [31]:
# Optionally save cleaned data back to CSV
weather_data.to_csv('Cleaned_Weather_Data.csv', index=False)

In [32]:
df = "Cleaned_Weather_Data.csv"
pd.read_csv(df)

Unnamed: 0,Date,Temp9am,Temp3pm,MinTemp,MaxTemp,Rainfall,RainToday,Evaporation,Sunshine,WindGustDir,...,WindDir9am,WindDir3pm,WindSpeed9am,WindSpeed3pm,Humidity9am,Humidity3pm,Pressure9am,Pressure3pm,Cloud9am,Cloud3pm
0,2013-02-01,20.7,20.9,19.5,22.4,15.6,Yes,6.2,0.0,W,...,S,SSW,17,20,92,84,1017.6,1017.4,8,8
1,2013-02-02,22.4,24.8,19.5,25.6,6.0,Yes,3.4,2.7,W,...,W,E,9,13,83,73,1017.9,1016.4,7,7
2,2013-02-03,23.5,23.0,21.6,24.5,6.6,Yes,2.4,0.1,W,...,ESE,ESE,17,2,88,86,1016.7,1015.6,7,8
3,2013-02-04,21.4,20.9,20.2,22.8,18.8,Yes,2.2,0.0,W,...,NNE,E,22,20,83,90,1014.2,1011.8,8,8
4,2013-02-05,22.5,25.5,19.7,25.7,77.4,Yes,4.8,0.0,W,...,NNE,W,11,6,88,74,1008.3,1004.8,8,8
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3266,2022-01-11,10.5,17.9,8.6,19.6,0.0,No,2.0,7.8,SSE,...,W,SSE,22,20,73,52,1025.9,1025.3,2,2
3267,2022-01-12,11.0,18.7,9.3,19.2,0.0,No,2.0,9.2,W,...,W,ESE,20,7,78,53,1028.5,1024.6,2,2
3268,2022-01-13,10.2,17.3,9.4,17.7,0.0,No,2.4,2.7,W,...,WNW,N,15,13,85,56,1020.8,1015.0,6,6
3269,2022-01-14,12.4,19.0,10.1,19.3,0.0,No,1.4,9.3,W,...,W,W,17,19,56,35,1017.3,1015.1,5,2


In [34]:
pd.read_csv(path)

Unnamed: 0,Date,Temp9am,Temp3pm,MinTemp,MaxTemp,Rainfall,RainToday,Evaporation,Sunshine,WindGustDir,...,WindDir9am,WindDir3pm,WindSpeed9am,WindSpeed3pm,Humidity9am,Humidity3pm,Pressure9am,Pressure3pm,Cloud9am,Cloud3pm
0,01-02-13,20.7,20.9,19.5,22.4,15.6,Yes,6.2,0.0,W,...,S,SSW,17,20,92,84,1017.6,1017.4,8,8
1,02-02-13,22.4,24.8,19.5,25.6,6.0,Yes,3.4,2.7,W,...,W,E,9,13,83,73,1017.9,1016.4,7,7
2,03-02-13,23.5,23.0,21.6,24.5,6.6,Yes,2.4,0.1,W,...,ESE,ESE,17,2,88,86,1016.7,1015.6,7,8
3,04-02-13,21.4,20.9,20.2,22.8,18.8,Yes,2.2,0.0,W,...,NNE,E,22,20,83,90,1014.2,1011.8,8,8
4,05-02-13,22.5,25.5,19.7,25.7,77.4,Yes,4.8,0.0,W,...,NNE,W,11,6,88,74,1008.3,1004.8,8,8
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3266,11-01-22,10.5,17.9,8.6,19.6,0.0,No,2.0,7.8,SSE,...,W,SSE,22,20,73,52,1025.9,1025.3,2,2
3267,12-01-22,11.0,18.7,9.3,19.2,0.0,No,2.0,9.2,W,...,W,ESE,20,7,78,53,1028.5,1024.6,2,2
3268,13-01-22,10.2,17.3,9.4,17.7,0.0,No,2.4,2.7,W,...,WNW,N,15,13,85,56,1020.8,1015.0,6,6
3269,14-01-22,12.4,19.0,10.1,19.3,0.0,No,1.4,9.3,W,...,W,W,17,19,56,35,1017.3,1015.1,5,2
