In [2]:
# weather_analyzer/main.py

import pandas as pd

# Load the weather dataset
df = pd.read_csv("weather.csv")
df.sample(5)

Unnamed: 0,Data.Precipitation,Date.Full,Date.Month,Date.Week of,Date.Year,Station.City,Station.Code,Station.Location,Station.State,Data.Temperature.Avg Temp,Data.Temperature.Max Temp,Data.Temperature.Min Temp,Data.Wind.Direction,Data.Wind.Speed
7303,0.2,2016-06-12,6,12,2016,Tucson,TUS,"Tucson, AZ",Arizona,88,101,73,22,3.6
6261,0.5,2016-05-15,5,15,2016,Amarillo,AMA,"Amarillo, TX",Texas,60,73,47,13,9.41
9282,0.64,2016-07-24,7,24,2016,Bangor,BGR,"Bangor, ME",Maine,75,86,63,26,6.51
3341,0.03,2016-03-13,3,13,2016,Reno,RNO,"Reno, NV",Nevada,46,55,37,19,9.38
8628,1.39,2016-07-10,7,10,2016,Rockford,RFD,"Rockford, IL",Illinois,75,84,65,22,5.52


In [3]:
# Rename columns to simpler names
df.rename(columns={
    'Data.Precipitation': 'Precipitation',
    'Date.Full': 'Date',
    'Date.Month': 'Month',
    'Date.Week of': 'Week',
    'Date.Year': 'Year',
    'Station.City': 'City',
    'Station.Code': 'Code',
    'Station.Location': 'Location',
    'Station.State': 'State',
    'Data.Temperature.Avg Temp': 'AvgTemp',
    'Data.Temperature.Max Temp': 'MaxTemp',
    'Data.Temperature.Min Temp': 'MinTemp',
    'Data.Wind.Direction': 'WindDirection',
    'Data.Wind.Speed': 'WindSpeed'
}, inplace=True)

# Optional: print the cleaned column names
print("\n✅ Renamed Columns:")
print(df.columns.tolist())


✅ Renamed Columns:
['Precipitation', 'Date', 'Month', 'Week', 'Year', 'City', 'Code', 'Location', 'State', 'AvgTemp', 'MaxTemp', 'MinTemp', 'WindDirection', 'WindSpeed']


In [6]:
# Show the first 5 rows
print("\n🔹 First 5 Rows:")
print(df.head())


🔹 First 5 Rows:
   Precipitation        Date  Month  Week  Year        City Code  \
0           0.00  2016-01-03      1     3  2016  Birmingham  BHM   
1           0.00  2016-01-03      1     3  2016  Huntsville  HSV   
2           0.16  2016-01-03      1     3  2016      Mobile  MOB   
3           0.00  2016-01-03      1     3  2016  Montgomery  MGM   
4           0.01  2016-01-03      1     3  2016   Anchorage  ANC   

         Location    State  AvgTemp  MaxTemp  MinTemp  WindDirection  \
0  Birmingham, AL  Alabama       39       46       32             33   
1  Huntsville, AL  Alabama       39       47       31             32   
2      Mobile, AL  Alabama       46       51       41             35   
3  Montgomery, AL  Alabama       45       52       38             32   
4   Anchorage, AK   Alaska       34       38       29             19   

   WindSpeed  
0       4.33  
1       3.86  
2       9.73  
3       6.86  
4       7.80  


In [4]:
# Basic info: rows, columns, data types
print("\n🔹 Dataset Info:")
print(df.info())


🔹 Dataset Info:
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 16743 entries, 0 to 16742
Data columns (total 14 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   Precipitation  16743 non-null  float64
 1   Date           16743 non-null  object 
 2   Month          16743 non-null  int64  
 3   Week           16743 non-null  int64  
 4   Year           16743 non-null  int64  
 5   City           16743 non-null  object 
 6   Code           16743 non-null  object 
 7   Location       16743 non-null  object 
 8   State          16743 non-null  object 
 9   AvgTemp        16743 non-null  int64  
 10  MaxTemp        16743 non-null  int64  
 11  MinTemp        16743 non-null  int64  
 12  WindDirection  16743 non-null  int64  
 13  WindSpeed      16743 non-null  float64
dtypes: float64(2), int64(7), object(5)
memory usage: 1.8+ MB
None


In [5]:
# Basic stats (for numeric columns)
print("\n🔹 Statistical Summary:")
print(df.describe())


🔹 Statistical Summary:
       Precipitation         Month          Week          Year       AvgTemp  \
count   16743.000000  16743.000000  16743.000000  16743.000000  16743.000000   
mean        0.579090      6.343128     15.650242   2016.018933     56.089112   
std         0.988057      3.490723      8.923425      0.136294     18.798295   
min         0.000000      1.000000      1.000000   2016.000000    -27.000000   
25%         0.000000      3.000000      8.000000   2016.000000     44.000000   
50%         0.190000      6.000000     16.000000   2016.000000     58.000000   
75%         0.750000      9.000000     24.000000   2016.000000     71.000000   
max        20.890000     12.000000     31.000000   2017.000000    100.000000   

            MaxTemp       MinTemp  WindDirection     WindSpeed  
count  16743.000000  16743.000000   16743.000000  16743.000000  
mean      66.042406     45.642716      18.791316      6.329820  
std       19.787954     18.559263       6.461527      3.4947

🔥 Analyze Average Temperature per State

In [7]:
# Group by state and calculate average temperature
state_avg_temp = df.groupby('State')['AvgTemp'].mean().sort_values(ascending=False)

# Print the top 10 hottest states
print("\n🌡️ Top 10 Hottest States (Avg Temp):")
print(state_avg_temp.head(3))

# Print the bottom 10 coldest states
print("\n❄️ Top 10 Coldest States (Avg Temp):")
print(state_avg_temp.tail(3))



🌡️ Top 10 Hottest States (Avg Temp):
State
Puerto Rico    81.358491
Hawaii         76.384906
Florida        73.926101
Name: AvgTemp, dtype: float64

❄️ Top 10 Coldest States (Avg Temp):
State
North Dakota    45.166038
Minnesota       44.675472
Alaska          41.157650
Name: AvgTemp, dtype: float64
