In [5]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import classification_report, accuracy_score
import joblib

# Load data
df = pd.read_csv("seattle-weather.csv")  # Replace with the actual file path

# Preprocessing
df.dropna(inplace=True)
df['date'] = pd.to_datetime(df['date'])
df['month'] = df['date'].dt.month
df['day'] = df['date'].dt.day
df['year'] = df['date'].dt.year
df.drop('date', axis=1, inplace=True)

# Encode categorical target variable
df['weather'] = df['weather'].astype('category').cat.codes

# Features and labels
X = df[['precipitation', 'temp_max', 'temp_min', 'wind', 'month', 'day', 'year']]
y = df['weather']

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train Decision Tree
clf = DecisionTreeClassifier(max_depth=5, random_state=42)
clf.fit(X_train, y_train)

# Evaluate model
y_pred = clf.predict(X_test)
print("Accuracy:", accuracy_score(y_test, y_pred))
print("Classification Report:\n", classification_report(y_test, y_pred))

# Save the model
joblib.dump(clf, "decision_tree_weather.pkl")


Accuracy: 0.8191126279863481
Classification Report:
               precision    recall  f1-score   support

           0       0.33      0.11      0.17         9
           1       1.00      0.20      0.33        25
           2       0.87      0.92      0.89       120
           3       0.50      0.38      0.43         8
           4       0.79      0.92      0.85       131

    accuracy                           0.82       293
   macro avg       0.70      0.51      0.53       293
weighted avg       0.82      0.82      0.79       293



['decision_tree_weather.pkl']

In [3]:
!pip install scikit-learn

Collecting scikit-learn
  Downloading scikit_learn-1.5.2-cp310-cp310-win_amd64.whl.metadata (13 kB)
Collecting scipy>=1.6.0 (from scikit-learn)
  Downloading scipy-1.14.1-cp310-cp310-win_amd64.whl.metadata (60 kB)
Collecting joblib>=1.2.0 (from scikit-learn)
  Downloading joblib-1.4.2-py3-none-any.whl.metadata (5.4 kB)
Collecting threadpoolctl>=3.1.0 (from scikit-learn)
  Downloading threadpoolctl-3.5.0-py3-none-any.whl.metadata (13 kB)
Downloading scikit_learn-1.5.2-cp310-cp310-win_amd64.whl (11.0 MB)
   ---------------------------------------- 0.0/11.0 MB ? eta -:--:--
    --------------------------------------- 0.3/11.0 MB ? eta -:--:--
   - -------------------------------------- 0.5/11.0 MB 1.4 MB/s eta 0:00:08
   -- ------------------------------------- 0.8/11.0 MB 1.5 MB/s eta 0:00:07
   --- ------------------------------------ 1.0/11.0 MB 1.5 MB/s eta 0:00:07
   ----- ---------------------------------- 1.6/11.0 MB 1.6 MB/s eta 0:00:07
   ------ --------------------------------- 