# Random Forest

In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score
import category_encoders as ce
import os
import random
from sklearn.metrics import mean_squared_error
from collections import Counter
import matplotlib.pyplot as plt
import seaborn as sns

data_folder = 'Data/'
data_folder2 = 'RandomForest/'
accuracies = []


for year in range(1999, 2024):
    file_name = f"{year}.csv"
    file_name2 = f"{year}.pkl"
    file_name3 = f"{year}.png"
    file_path = os.path.join(data_folder, file_name)
    
    if os.path.exists(file_path):
        df = pd.read_csv(file_path)
        
        df.drop('Yönetmen', axis=1, inplace=True)
        df.drop('Senarist', axis=1, inplace=True)
        df.drop('Başroller', axis=1, inplace=True)
        df.drop('Besteci', axis=1, inplace=True)
        df.drop('Türler', axis=1, inplace=True)
        df.drop('Unnamed: 0', axis=1, inplace=True)
        df.drop('Unnamed: 0.1', axis=1, inplace=True)
        df = df.reset_index(drop=True)
        
        X = df.drop(['Sonuç'], axis=1)
        y = df['Sonuç']
        
        encoder = ce.OrdinalEncoder(cols=['Dizi İsimleri','Platform','Yapım Şirketi'])
        X_encoded = encoder.fit_transform(X)
        
        class_counts = y.value_counts()
        min_class_samples = class_counts.min()
        
        test_indices = []
        for c in y.unique():
            class_indices = df[df['Sonuç'] == c].index.tolist()
            if len(class_indices) >= min_class_samples:
                random_indices = random.sample(class_indices, min_class_samples)
            else:
                random_indices = random.choices(class_indices, k=min_class_samples)
            test_indices.extend(random_indices)
        
        X_test = X_encoded.loc[test_indices]
        y_test = y.loc[test_indices]
        X_train, _, y_train, _ = train_test_split(X_encoded.drop(test_indices), y.drop(test_indices), test_size=0.2, random_state=42)
       
        model = RandomForestClassifier(n_estimators=150)
        
        model.fit(X_train, y_train)
        
        test_data = df.loc[test_indices]

        y_pred = model.predict(X_test)
        
        class_names = sorted(y.unique())
        cm = confusion_matrix(y_test, y_pred)
        plt.figure(figsize=(10, 10))
        sns.heatmap(cm, annot=True, cmap='Blues', fmt='d', 
            xticklabels=class_names, yticklabels=class_names)
        plt.xlabel('Predicted labels')
        plt.ylabel('True labels')
        plt.title('Confusion Matrix')

        accuracy = accuracy_score(y_test, y_pred)
        precision = precision_score(y_test, y_pred, average='macro',zero_division=0)
        recall = recall_score(y_test, y_pred, average='macro',zero_division=0)
        f1 = f1_score(y_test, y_pred, average='macro',zero_division=0)

        accuracies.append(accuracy)
        print(year)
        print("Accuracy:", accuracy)
        print("F1 Score:", f1)
        print("Precision Score:", precision)
        print("Recall Score:", recall)
        print("***---***")
        
        file_path2 = os.path.join(data_folder2, file_name2)
        file_path3 = os.path.join(data_folder2, file_name3)
        
        if not os.path.exists(data_folder2):
            os.makedirs(data_folder2)
        with open(file_path2, 'wb') as file:
            pickle.dump(model, file)
            plt.savefig(file_path3)
        
average_accuracy = sum(accuracies) / len(accuracies)
print(f"Average accuracy for all years: {average_accuracy}")

# Ada Boost

In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score
from sklearn.ensemble import AdaBoostClassifier
from sklearn.metrics import confusion_matrix
import category_encoders as ce
import os
import random
import pickle
import matplotlib.pyplot as plt
import seaborn as sns

data_folder = 'Data/'
data_folder2 = 'AdaBoost/'
accuracies = []


for year in range(1999, 2024):
    file_name = f"{year}.csv"
    file_name2 = f"{year}.pkl"
    file_name3 = f"{year}.png"
    file_path = os.path.join(data_folder, file_name)
    
    if os.path.exists(file_path):
        df = pd.read_csv(file_path)
        
        df.drop('Yönetmen', axis=1, inplace=True)
        df.drop('Senarist', axis=1, inplace=True)
        df.drop('Başroller', axis=1, inplace=True)
        df.drop('Besteci', axis=1, inplace=True)
        df.drop('Türler', axis=1, inplace=True)
        df.drop('Unnamed: 0', axis=1, inplace=True)
        df.drop('Unnamed: 0.1', axis=1, inplace=True)
        df = df.reset_index(drop=True)
        
        X = df.drop(['Sonuç'], axis=1)
        y = df['Sonuç']
        
        encoder = ce.OrdinalEncoder(cols=['Dizi İsimleri','Platform','Yapım Şirketi'])
        X_encoded = encoder.fit_transform(X)
        
        class_counts = y.value_counts()
        min_class_samples = class_counts.min()
        
        test_indices = []
        for c in y.unique():
            class_indices = df[df['Sonuç'] == c].index.tolist()
            if len(class_indices) >= min_class_samples:
                random_indices = random.sample(class_indices, min_class_samples)
            else:
                random_indices = random.choices(class_indices, k=min_class_samples)
            test_indices.extend(random_indices)
        
        X_test = X_encoded.loc[test_indices]
        y_test = y.loc[test_indices]
        X_train, _, y_train, _ = train_test_split(X_encoded.drop(test_indices), y.drop(test_indices), test_size=0.2, random_state=42)
       
        model = AdaBoostClassifier(n_estimators=120)
        
        model.fit(X_train, y_train)
        
        test_data = df.loc[test_indices]

        y_pred = model.predict(X_test)
        
        class_names = sorted(y.unique())
        cm = confusion_matrix(y_test, y_pred)
        plt.figure(figsize=(10, 10))
        sns.heatmap(cm, annot=True, cmap='Blues', fmt='d', 
            xticklabels=class_names, yticklabels=class_names)
        plt.xlabel('Predicted labels')
        plt.ylabel('True labels')
        plt.title('Confusion Matrix')

        accuracy = accuracy_score(y_test, y_pred)
        precision = precision_score(y_test, y_pred, average='macro',zero_division=0)
        recall = recall_score(y_test, y_pred, average='macro',zero_division=0)
        f1 = f1_score(y_test, y_pred, average='macro',zero_division=0)

        accuracies.append(accuracy)
        print(year)
        print("Accuracy:", accuracy)
        print("F1 Score:", f1)
        print("Precision Score:", precision)
        print("Recall Score:", recall)
        print("***---***")
        
        file_path2 = os.path.join(data_folder2, file_name2)
        file_path3 = os.path.join(data_folder2, file_name3)
        
        if not os.path.exists(data_folder2):
            os.makedirs(data_folder2)
        with open(file_path2, 'wb') as file:
            pickle.dump(model, file)
            plt.savefig(file_path3)
        
average_accuracy = sum(accuracies) / len(accuracies)
print(f"Average accuracy for all years: {average_accuracy}")

# Decision Tree

In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import confusion_matrix
import category_encoders as ce
import os
import random
import matplotlib.pyplot as plt
import seaborn as sns

data_folder = 'Data/'
data_folder2 = 'DecisionTree/'
accuracies = []


for year in range(1999, 2024):
    file_name = f"{year}.csv"
    file_name2 = f"{year}.pkl"
    file_name3 = f"{year}.png"
    file_path = os.path.join(data_folder, file_name)
    
    if os.path.exists(file_path):
        df = pd.read_csv(file_path)
        
        df.drop('Yönetmen', axis=1, inplace=True)
        df.drop('Senarist', axis=1, inplace=True)
        df.drop('Başroller', axis=1, inplace=True)
        df.drop('Besteci', axis=1, inplace=True)
        df.drop('Türler', axis=1, inplace=True)
        df.drop('Unnamed: 0', axis=1, inplace=True)
        df.drop('Unnamed: 0.1', axis=1, inplace=True)
        df = df.reset_index(drop=True)
        
        X = df.drop(['Sonuç'], axis=1)
        y = df['Sonuç']
        
        encoder = ce.OrdinalEncoder(cols=['Dizi İsimleri','Platform','Yapım Şirketi'])
        X_encoded = encoder.fit_transform(X)
        
        class_counts = y.value_counts()
        min_class_samples = class_counts.min()
        
        test_indices = []
        for c in y.unique():
            class_indices = df[df['Sonuç'] == c].index.tolist()
            if len(class_indices) >= min_class_samples:
                random_indices = random.sample(class_indices, min_class_samples)
            else:
                random_indices = random.choices(class_indices, k=min_class_samples)
            test_indices.extend(random_indices)
        
        X_test = X_encoded.loc[test_indices]
        y_test = y.loc[test_indices]
        X_train, _, y_train, _ = train_test_split(X_encoded.drop(test_indices), y.drop(test_indices), test_size=0.2, random_state=42)
       
        model = DecisionTreeClassifier()
        
        model.fit(X_train, y_train)
        
        test_data = df.loc[test_indices]

        y_pred = model.predict(X_test)
        
        class_names = sorted(y.unique())
        cm = confusion_matrix(y_test, y_pred)
        plt.figure(figsize=(10, 10))
        sns.heatmap(cm, annot=True, cmap='Blues', fmt='d', 
            xticklabels=class_names, yticklabels=class_names)
        plt.xlabel('Predicted labels')
        plt.ylabel('True labels')
        plt.title('Confusion Matrix')

        accuracy = accuracy_score(y_test, y_pred)
        precision = precision_score(y_test, y_pred, average='macro',zero_division=0)
        recall = recall_score(y_test, y_pred, average='macro',zero_division=0)
        f1 = f1_score(y_test, y_pred, average='macro',zero_division=0)

        accuracies.append(accuracy)
        print(year)
        print("Accuracy:", accuracy)
        print("F1 Score:", f1)
        print("Precision Score:", precision)
        print("Recall Score:", recall)
        print("***---***")
        
        file_path2 = os.path.join(data_folder2, file_name2)
        file_path3 = os.path.join(data_folder2, file_name3)
        
        if not os.path.exists(data_folder2):
            os.makedirs(data_folder2)
        with open(file_path2, 'wb') as file:
            pickle.dump(model, file)
            plt.savefig(file_path3)
        
average_accuracy = sum(accuracies) / len(accuracies)
print(f"Average accuracy for all years: {average_accuracy}")


# LightGBM

In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
import lightgbm as ltb
import category_encoders as ce
import os
import random

data_folder = 'Data/'
accuracies = []

for year in range(1999, 2024):
    file_name = f"{year}.csv"
    file_path = os.path.join(data_folder, file_name)
    
    if os.path.exists(file_path):
        df = pd.read_csv(file_path)
        df.drop('Yönetmen', axis=1, inplace=True)
        df.drop('Senarist', axis=1, inplace=True)
        df.drop('Başroller', axis=1, inplace=True)
        df.drop('Besteci', axis=1, inplace=True)
        df.drop('Türler', axis=1, inplace=True)
        df.drop('Unnamed: 0', axis=1, inplace=True)
        df.drop('Unnamed: 0.1', axis=1, inplace=True)
        df = df.reset_index(drop=True)
        
        X = df.drop(['Sonuç'], axis=1)
        y = df['Sonuç']
        
        encoder = ce.OrdinalEncoder(cols=['Dizi İsimleri','Platform','Yapım Şirketi','Sonuç'])
        X_encoded = encoder.fit_transform(X)
        
        class_counts = y.value_counts()
        min_class_samples = class_counts.min()
        
        test_indices = []
        for c in y.unique():
            class_indices = df[df['Sonuç'] == c].index.tolist()
            if len(class_indices) >= min_class_samples:
                random_indices = random.sample(class_indices, min_class_samples)
            else:
                random_indices = random.choices(class_indices, k=min_class_samples)
            test_indices.extend(random_indices)
        
        X_test = X_encoded.loc[test_indices]
        y_test = y.loc[test_indices]
        X_train, _, y_train, _ = train_test_split(X_encoded.drop(test_indices), y.drop(test_indices), test_size=0.2, random_state=42)
        
        model = ltb.LGBMRegressor()
        
        model.fit(X_train, y_train)
        
        test_data = df.loc[test_indices]
        
        accuracy = model.score(X_test, y_test)
        accuracies.append(accuracy)
        print(f"Accuracy for year {year}: {accuracy}")

average_accuracy = sum(accuracies) / len(accuracies)
print(f"Average accuracy for all years: {average_accuracy}")


# Naive Bayes

In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score
import category_encoders as ce
import os
import random
from sklearn.metrics import mean_squared_error
import matplotlib.pyplot as plt
import seaborn as sns

data_folder = 'Data/'
data_folder2 = 'NaiveBayes/'
accuracies = []


for year in range(1999, 2024):
    file_name = f"{year}.csv"
    file_name2 = f"{year}.pkl"
    file_name3 = f"{year}.png"
    file_path = os.path.join(data_folder, file_name)
    
    if os.path.exists(file_path):
        df = pd.read_csv(file_path)
        
        df.drop('Yönetmen', axis=1, inplace=True)
        df.drop('Senarist', axis=1, inplace=True)
        df.drop('Başroller', axis=1, inplace=True)
        df.drop('Besteci', axis=1, inplace=True)
        df.drop('Türler', axis=1, inplace=True)
        df.drop('Unnamed: 0', axis=1, inplace=True)
        df.drop('Unnamed: 0.1', axis=1, inplace=True)
        df = df.reset_index(drop=True)
        
        X = df.drop(['Sonuç'], axis=1)
        y = df['Sonuç']
        
        encoder = ce.OrdinalEncoder(cols=['Dizi İsimleri','Platform','Yapım Şirketi'])
        X_encoded = encoder.fit_transform(X)
        
        class_counts = y.value_counts()
        min_class_samples = class_counts.min()
        
        test_indices = []
        for c in y.unique():
            class_indices = df[df['Sonuç'] == c].index.tolist()
            if len(class_indices) >= min_class_samples:
                random_indices = random.sample(class_indices, min_class_samples)
            else:
                random_indices = random.choices(class_indices, k=min_class_samples)
            test_indices.extend(random_indices)
        
        X_test = X_encoded.loc[test_indices]
        y_test = y.loc[test_indices]
        X_train, _, y_train, _ = train_test_split(X_encoded.drop(test_indices), y.drop(test_indices), test_size=0.2, random_state=42)
       
        model = GaussianNB()
        
        model.fit(X_train, y_train)
        
        test_data = df.loc[test_indices]

        y_pred = model.predict(X_test)
        
        class_names = sorted(y.unique())
        cm = confusion_matrix(y_test, y_pred)
        plt.figure(figsize=(10, 10))
        sns.heatmap(cm, annot=True, cmap='Blues', fmt='d', 
            xticklabels=class_names, yticklabels=class_names)
        plt.xlabel('Predicted labels')
        plt.ylabel('True labels')
        plt.title('Confusion Matrix')

        accuracy = accuracy_score(y_test, y_pred)
        precision = precision_score(y_test, y_pred, average='macro',zero_division=0)
        recall = recall_score(y_test, y_pred, average='macro',zero_division=0)
        f1 = f1_score(y_test, y_pred, average='macro',zero_division=0)

        accuracies.append(accuracy)
        print(year)
        print("Accuracy:", accuracy)
        print("F1 Score:", f1)
        print("Precision Score:", precision)
        print("Recall Score:", recall)
        print("***---***")
        
        file_path2 = os.path.join(data_folder2, file_name2)
        file_path3 = os.path.join(data_folder2, file_name3)
        
        if not os.path.exists(data_folder2):
            os.makedirs(data_folder2)
        with open(file_path2, 'wb') as file:
            pickle.dump(model, file)
            plt.savefig(file_path3)
        
average_accuracy = sum(accuracies) / len(accuracies)
print(f"Average accuracy for all years: {average_accuracy}")