In [282]:
# Import necessary libraries
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
import xgboost as xgb
from sklearn.metrics import accuracy_score

In [284]:
weather_df = pd.read_csv(r"C:\Users\sakif\Downloads\seattle-weather.csv")
weather_df.head()

Unnamed: 0,date,precipitation,temp_max,temp_min,wind,weather
0,1/1/2012,0.0,12.8,5.0,4.7,drizzle
1,1/2/2012,10.9,10.6,2.8,4.5,rain
2,1/3/2012,0.8,11.7,7.2,2.3,rain
3,1/4/2012,20.3,12.2,5.6,4.7,rain
4,1/5/2012,1.3,8.9,2.8,6.1,rain


In [182]:
weather_df.isnull().sum()

date             0
precipitation    0
temp_max         0
temp_min         0
wind             0
weather          0
dtype: int64

In [184]:
weather_df.dtypes

date              object
precipitation    float64
temp_max         float64
temp_min         float64
wind             float64
weather           object
dtype: object

In [286]:
# Convert 'date' column to datetime format
weather_df['date'] = pd.to_datetime(weather_df['date'])

In [288]:
weather_df.head()

Unnamed: 0,date,precipitation,temp_max,temp_min,wind,weather
0,2012-01-01,0.0,12.8,5.0,4.7,drizzle
1,2012-01-02,10.9,10.6,2.8,4.5,rain
2,2012-01-03,0.8,11.7,7.2,2.3,rain
3,2012-01-04,20.3,12.2,5.6,4.7,rain
4,2012-01-05,1.3,8.9,2.8,6.1,rain


In [290]:
weather_df.dtypes

date             datetime64[ns]
precipitation           float64
temp_max                float64
temp_min                float64
wind                    float64
weather                  object
dtype: object

In [300]:
# Extract useful features from the 'date' column (year, month, day)
weather_df['year'] = weather_df['date'].dt.year
weather_df['month'] = weather_df['date'].dt.month
weather_df['day'] = weather_df['date'].dt.day

In [302]:
weather_df.head()

Unnamed: 0,date,precipitation,temp_max,temp_min,wind,weather,year,month,day
0,2012-01-01,0.0,12.8,5.0,4.7,drizzle,2012,1,1
1,2012-01-02,10.9,10.6,2.8,4.5,rain,2012,1,2
2,2012-01-03,0.8,11.7,7.2,2.3,rain,2012,1,3
3,2012-01-04,20.3,12.2,5.6,4.7,rain,2012,1,4
4,2012-01-05,1.3,8.9,2.8,6.1,rain,2012,1,5


In [304]:
# Drop the original 'date' column since we extracted features from it
weather_df.drop(columns=['date'], inplace=True)

In [306]:
# Encode the 'weather' column (categorical variable)
label_encoder = LabelEncoder()
weather_df['weather'] = label_encoder.fit_transform(weather_df['weather'])

In [308]:
# Split data into features (X) and target variable (y)
X = weather_df.drop(columns=['weather'])  # Features (temperature, humidity, etc.)
y = weather_df['weather']  # Target (weather type)

In [310]:
# Split data into training (80%) and testing (20%) sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [316]:
# Initialize the XGBoost model
xgb_model = xgb.XGBClassifier(eval_metric='mlogloss')

In [340]:
# Train the model on the training set
xgb_model.fit(X_train, y_train)

In [346]:
# Predict the weather for the test set
y_pred = xgb_model.predict(X_test)

In [348]:
# Calculate accuracy of the model
accuracy = accuracy_score(y_test, y_pred)
print(f"Model Accuracy: {accuracy * 100:.2f}%")

Model Accuracy: 83.28%


In [350]:
# Prepare data for 01/01/2016

new_data = pd.DataFrame({
    'precipitation': [0.0],  # Example precipitation
    'temp_max': [10.0],      # Example max temperature
    'temp_min': [3.0],       # Example min temperature
    'wind': [2.0],           # Example wind speed
    'year': [2016], 
    'month': [1], 
    'day': [1]
})

In [352]:
# Predict the weather
predicted_weather = xgb_model.predict(new_data)

In [354]:
# Convert predicted number back to weather label
predicted_label = label_encoder.inverse_transform(predicted_weather)

In [356]:
# Print the final prediction
print(f"Predicted Weather on 01/01/2016: {predicted_label[0]}")

Predicted Weather on 01/01/2016: fog


In [358]:
# Solution to print the number instead
print(f"Predicted Weather on 01/01/2016 (Encoded Number): {predicted_weather[0]}")

Predicted Weather on 01/01/2016 (Encoded Number): 1
