In [37]:
import pandas as pd
from collections import defaultdict
from datetime import datetime

In [38]:
# Load dataset
df = pd.read_csv("weather_data_new.csv")

In [39]:
df.head()

Unnamed: 0,Location,Date_Time,Temperature_C,Humidity_pct,Precipitation_mm,Wind_Speed_kmh
0,San Antonio,18-09-2015 02:23,-2.489152,86.853813,4.88581,14.469642
1,Houston,27-03-2015 01:19,17.537183,39.588093,0.418723,18.342313
2,San Jose,10-10-2015 09:44,33.858069,63.763412,8.951226,17.636
3,Dallas,22-10-2015 12:14,18.48507,61.275683,2.68004,12.697902
4,San Diego,19-10-2015 08:56,-0.284422,45.024843,9.008345,29.536394


In [40]:
# Print columns to debug
print(df.columns)

Index(['Location', 'Date_Time', 'Temperature_C', 'Humidity_pct',
       'Precipitation_mm', 'Wind_Speed_kmh'],
      dtype='object')


In [41]:
df.drop('Humidity_pct', axis = 1,inplace = True)

In [42]:
df.drop('Precipitation_mm', axis = 1,inplace = True) 

In [43]:
df.drop('Wind_Speed_kmh', axis = 1,inplace = True) 

In [44]:
df.head(5)

Unnamed: 0,Location,Date_Time,Temperature_C
0,San Antonio,18-09-2015 02:23,-2.489152
1,Houston,27-03-2015 01:19,17.537183
2,San Jose,10-10-2015 09:44,33.858069
3,Dallas,22-10-2015 12:14,18.48507
4,San Diego,19-10-2015 08:56,-0.284422


In [45]:
# Ensure no leading/trailing spaces in column names
df.columns = df.columns.str.strip()

In [46]:
# --- MAP STEP: Extract (year, temperature) pairs ---
year_temp = defaultdict(list)
for _, row in df.iterrows():
    date_time = datetime.strptime(row['Date_Time'], "%d-%m-%Y %H:%M")
    year = date_time.year
    year_temp[year].append(row['Temperature_C'])

In [47]:
# --- REDUCE STEP: Calculate average temperature for each year ---
avg_temp_by_year = {}
for year, temps in year_temp.items():
    avg_temp_by_year[year] = sum(temps) / len(temps)

In [48]:
# --- Determine coolest and hottest year ---
coolest_year = min(avg_temp_by_year, key=avg_temp_by_year.get)
hottest_year = max(avg_temp_by_year, key=avg_temp_by_year.get)

In [49]:
print("Average Temperature by Year:")
for year, avg in avg_temp_by_year.items():
    print(f"{year}: {avg:.2f}°C")

Average Temperature by Year:
2015: 14.97°C
2016: 13.55°C
2017: 15.88°C
2018: 14.31°C
2019: 14.67°C
2020: 15.62°C
2021: 13.70°C
2022: 15.82°C
2023: 14.85°C
2024: 13.36°C


In [50]:
print(f"\nCoolest Year: {coolest_year} ({avg_temp_by_year[coolest_year]:.2f}°C)")
print(f"Hottest Year: {hottest_year} ({avg_temp_by_year[hottest_year]:.2f}°C)")


Coolest Year: 2024 (13.36°C)
Hottest Year: 2017 (15.88°C)
