In [None]:
# Imports
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import classification_report, accuracy_score
import matplotlib.pyplot as plt
from sklearn.tree import plot_tree

# Load the processed data
data = pd.read_csv("processed_influenza_data.csv")  # Make sure the CSV file name matches your output file name

# Define features and target variable
features = ['ISO_YEAR', 'ISO_WEEK', 'SPEC_RECEIVED_NB', 'SPEC_PROCESSED_NB', 'AH1', 'AH3', 'INF_ALL']  # adjust based on your relevant features
target = 'INF_ALL'  # adjust this if your target variable is different

# Prepare the data for training
X = data[features]
y = data[target]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42, stratify=y)

# Initialize and train the Decision Tree Classifier
dt_classifier = DecisionTreeClassifier(max_depth=5)
dt_classifier.fit(X_train, y_train)

# Predictions
y_pred = dt_classifier.predict(X_test)

# Evaluation
accuracy = accuracy_score(y_test, y_pred)
report = classification_report(y_test, y_pred)
print("Accuracy of the model:", accuracy)
print("Classification Report:\n", report)

# Plot the tree
plt.figure(figsize=(20,10))
plot_tree(dt_classifier, filled=True, feature_names=features, class_names=str(dt_classifier.classes_))
plt.title('Decision Tree for Influenza Data')
plt.show()
