In [2]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [3]:
%cd /content/drive/MyDrive/Concepts and Technologies of AI

/content/drive/MyDrive/Concepts and Technologies of AI


In [4]:
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.feature_selection import SelectKBest, f_classif, RFE

In [6]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler

# Load dataset
file_path = "/content/drive/MyDrive/AI finalprototype /goal15.forest_shares.csv"
df = pd.read_csv(file_path)

# Drop rows with missing 'trend' values
df = df.dropna(subset=['trend'])

# Define target variable (trend category)
def categorize_trend(value):
    if value > 0:
        return "Increase"
    elif value < 0:
        return "Decrease"
    else:
        return "No Change"

df["Trend Category"] = df["trend"].apply(categorize_trend)

# Encode categorical labels
label_encoder = LabelEncoder()
df["Trend Category"] = label_encoder.fit_transform(df["Trend Category"])

# Select features and target
X = df[["forests_2000", "forests_2020"]]  # Using forest coverage as features
y = df["Trend Category"]

# Split data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Scale features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)


In [10]:
# Calculate accuracy for Logistic Regression
train_acc_lr = accuracy_score(y_train, y_train_pred_lr)
test_acc_lr = accuracy_score(y_test, y_test_pred_lr)

print(f"Logistic Regression - Train Accuracy: {train_acc_lr:.4f}")
print(f"Logistic Regression - Test Accuracy: {test_acc_lr:.4f}")


Logistic Regression - Train Accuracy: 0.6630
Logistic Regression - Test Accuracy: 0.6522


In [11]:
# Evaluation for Logistic Regression
print("Logistic Regression - Train Accuracy:", accuracy_score(y_train, y_train_pred_lr))
print("Logistic Regression - Test Accuracy:", accuracy_score(y_test, y_test_pred_lr))


Logistic Regression - Train Accuracy: 0.6629834254143646
Logistic Regression - Test Accuracy: 0.6521739130434783


In [12]:
# Train Random Forest Model
rf_model = RandomForestClassifier(n_estimators=100, max_depth=None, random_state=42)
rf_model.fit(X_train, y_train)

# Predictions using Random Forest
y_train_pred_rf = rf_model.predict(X_train)
y_test_pred_rf = rf_model.predict(X_test)

# Evaluation for Random Forest
print("Random Forest - Train Accuracy:", accuracy_score(y_train, y_train_pred_rf))
print("Random Forest - Test Accuracy:", accuracy_score(y_test, y_test_pred_rf))


Random Forest - Train Accuracy: 1.0
Random Forest - Test Accuracy: 0.6739130434782609


In [13]:
# Hyperparameter tuning for Logistic Regression
log_reg = LogisticRegression(max_iter=1000)
param_grid_lr = {'C': [0.01, 0.1, 1, 10], 'solver': ['liblinear', 'lbfgs']}
grid_search_lr = GridSearchCV(log_reg, param_grid_lr, cv=5, scoring='accuracy')
grid_search_lr.fit(X_train, y_train)
best_log_reg = grid_search_lr.best_estimator_

In [14]:
# Hyperparameter tuning for Random Forest
rf_model = RandomForestClassifier(random_state=42)
param_grid_rf = {'n_estimators': [50, 100, 200], 'max_depth': [None, 10, 20]}
grid_search_rf = GridSearchCV(rf_model, param_grid_rf, cv=5, scoring='accuracy')
grid_search_rf.fit(X_train, y_train)
best_rf = grid_search_rf.best_estimator_

In [15]:
# Train and evaluate Logistic Regression
y_test_pred_lr = best_log_reg.predict(X_test)
print("Best Logistic Regression Parameters:", grid_search_lr.best_params_)
print("Logistic Regression Test Accuracy:", accuracy_score(y_test, y_test_pred_lr))
print("Confusion Matrix:\n", confusion_matrix(y_test, y_test_pred_lr))
print("Classification Report:\n", classification_report(y_test, y_test_pred_lr))

Best Logistic Regression Parameters: {'C': 10, 'solver': 'lbfgs'}
Logistic Regression Test Accuracy: 0.717391304347826
Confusion Matrix:
 [[18  4  0]
 [ 0 15  0]
 [ 1  8  0]]
Classification Report:
               precision    recall  f1-score   support

           0       0.95      0.82      0.88        22
           1       0.56      1.00      0.71        15
           2       0.00      0.00      0.00         9

    accuracy                           0.72        46
   macro avg       0.50      0.61      0.53        46
weighted avg       0.63      0.72      0.65        46



  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [16]:
selector = SelectKBest(score_func=f_classif, k=3)
X_new = selector.fit_transform(X_train, y_train)

print("Selected Features:", X.columns[selector.get_support()])

Selected Features: Index(['forests_2000', 'forests_2020'], dtype='object')




In [17]:
# Train and evaluate Random Forest
y_test_pred_rf = best_rf.predict(X_test)

# Store evaluation metrics
rf_params = grid_search_rf.best_params_
rf_accuracy = accuracy_score(y_test, y_test_pred_rf)
rf_conf_matrix = confusion_matrix(y_test, y_test_pred_rf)
rf_class_report = classification_report(y_test, y_test_pred_rf)

# Display results
print("Best Random Forest Parameters:", rf_params)
print("\nRandom Forest Test Accuracy:", rf_accuracy)
print("\nConfusion Matrix:\n", rf_conf_matrix)
print("\nClassification Report:\n", rf_class_report)


Best Random Forest Parameters: {'max_depth': 20, 'n_estimators': 100}

Random Forest Test Accuracy: 0.6739130434782609

Confusion Matrix:
 [[18  2  2]
 [ 3  9  3]
 [ 3  2  4]]

Classification Report:
               precision    recall  f1-score   support

           0       0.75      0.82      0.78        22
           1       0.69      0.60      0.64        15
           2       0.44      0.44      0.44         9

    accuracy                           0.67        46
   macro avg       0.63      0.62      0.62        46
weighted avg       0.67      0.67      0.67        46



In [18]:
# Calculate accuracy
lr_accuracy = accuracy_score(y_test, y_test_pred_lr)
rf_accuracy = accuracy_score(y_test, y_test_pred_rf)

# Compare models
if lr_accuracy > rf_accuracy:
    print("Final Model: Logistic Regression performed better.")
else:
    print("Final Model: Random Forest performed better.")


Final Model: Logistic Regression performed better.
