In [None]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.decomposition import PCA
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier, DecisionTreeRegressor
from sklearn.naive_bayes import MultinomialNB
from sklearn.neighbors import KNeighborsClassifier, KNeighborsRegressor
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score, mean_absolute_error, mean_squared_error
import time

# Veri setini yükleme
data = pd.read_csv('/Users/mustafacayli/Desktop/ödevler/veri madenciligi/ödev2/fake_news_dataset.csv')

# Metin verileri için TF-IDF vektörlerini oluşturmak
tfidf_vectorizer = TfidfVectorizer(stop_words='english')
X_text = tfidf_vectorizer.fit_transform(data['tweet'])  # metin verilerinin olduğu kısım

# regresyon hedefi
y_score = data['score']

# sınıflandırma hedeefi
y_target = data['target']

# Sınıflandırma ve regresyon algoritmalarını tanımlama
classification_models = {
    'KNN(TF-IDF)': KNeighborsClassifier(n_neighbors=5),
    'Naive Bayes(TF-IDF)': MultinomialNB(),
    'Decision Tree(TF-IDF)': DecisionTreeClassifier(),
    'KNN(TF-IDF)+FS': KNeighborsClassifier(n_neighbors=5),  
    'Naive Bayes(TF-IDF)+FS': MultinomialNB(),  
    'Decision Tree(TF-IDF)+FS': DecisionTreeClassifier(),  
    'KNN(TF-IDF)+PCA': KNeighborsClassifier(n_neighbors=5),  
    'Naive Bayes(TF-IDF)+PCA': MultinomialNB(),  
    'Decision Tree(TF-IDF)+PCA': DecisionTreeClassifier(),  
    'KNN(n-gram (most popular))': KNeighborsClassifier(n_neighbors=5),  
    'Naive Bayes(n-gram (most popular))': MultinomialNB(),  
    'Decision Tree(n-gram (most popular))': DecisionTreeClassifier(),  
    'KNN(n-gram (most popular))+FS': KNeighborsClassifier(n_neighbors=5),  
    'Naive Bayes(n-gram (most popular))+FS': MultinomialNB(),  
    'Decision Tree(n-gram (most popular))+FS': DecisionTreeClassifier(),  
    'KNN(n-gram (most popular))+PCA': KNeighborsClassifier(n_neighbors=5),  
    'Naive Bayes(n-gram (most popular))+PCA': MultinomialNB(),  
    'Decision Tree(n-gram (most popular))+PCA': DecisionTreeClassifier()  
}

regression_models = {
    'KNN Reg(TF-IDF)': KNeighborsRegressor(n_neighbors=5),
    'Naive Bayes Reg(TF-IDF)': MultinomialNB(),
    'Decision Tree Reg(TF-IDF)': DecisionTreeRegressor(),
    'KNN Reg(TF-IDF)+FS': KNeighborsRegressor(n_neighbors=5),  
    'Naive Bayes Reg(TF-IDF)+FS': MultinomialNB(),  
    'Decision Tree Reg(TF-IDF)+FS': DecisionTreeRegressor(), 
    'KNN Reg(TF-IDF)+PCA': KNeighborsRegressor(n_neighbors=5), 
    'Naive Bayes Reg(TF-IDF)+PCA': MultinomialNB(), 
    'Decision Tree Reg(TF-IDF)+PCA': DecisionTreeRegressor(),  
    'KNN Reg(n-gram (most popular))': KNeighborsRegressor(n_neighbors=5),  
    'Naive Bayes Reg(n-gram (most popular))': MultinomialNB(),  
    'Decision Tree Reg(n-gram (most popular))': DecisionTreeRegressor(), 
    'KNN Reg(n-gram (most popular))+FS': KNeighborsRegressor(n_neighbors=5),  
    'Naive Bayes Reg(n-gram (most popular))+FS': MultinomialNB(),  
    'Decision Tree Reg(n-gram (most popular))+FS': DecisionTreeRegressor(), 
    'KNN Reg(n-gram (most popular))+PCA': KNeighborsRegressor(n_neighbors=5),  
    'Naive Bayes Reg(n-gram (most popular))+PCA': MultinomialNB(),  
    'Decision Tree Reg(n-gram (most popular))+PCA': DecisionTreeRegressor()  
}

# Sınıflandırma sonuçlarını tutacak DataFrame
classification_results = pd.DataFrame(columns=['Method', 'Accuracy', 'F-measure', 'Precision', 'Recall', 'Number of Features', 'Training Time'])

# Regresyon sonuçlarını tutacak DataFrame
regression_results = pd.DataFrame(columns=['Method', 'MAE', 'RMSE', 'Number of Features', 'Training Time', 'Testing Time'])

# Veri setlerinin ayrılması
X_train_text, X_test_text, y_train_target, y_test_target = train_test_split(X_text, y_target, test_size=0.2, random_state=42)
X_train_score, X_test_score, y_train_score, y_test_score = train_test_split(X_text, y_score, test_size=0.2, random_state=42)

# Sınıflandırma model eğitimi ve sonuçları kaydetme
for name, model in classification_models.items():
    train_start = time.time()
    model.fit(X_train_text, y_train_target)
    train_time = time.time() - train_start
    test_start = time.time()
    y_pred = model.predict(X_test_text)
    test_time = time.time() - test_start
    accuracy = accuracy_score(y_test_target, y_pred)
    f_measure = f1_score(y_test_target, y_pred)
    precision = precision_score(y_test_target, y_pred)
    recall = recall_score(y_test_target, y_pred)
    num_features = X_train_text.shape[1]
    classification_results = pd.concat([classification_results, pd.DataFrame({
        'Method': [name],
        'Accuracy': [accuracy],
        'F-measure': [f_measure],
        'Precision': [precision],
        'Recall': [recall],
        'Number of Features': [num_features],
        'Training Time': [train_time],
        'Testing Time': [test_time]
    })])

# Regresyon modellerini eğitme ve sonuçları kaydetme
for name, model in regression_models.items():
    model.fit(X_train_text, y_train_score)
    y_pred_score = model.predict(X_test_text)
    mae = mean_absolute_error(y_test_score, y_pred_score)
    rmse = mean_squared_error(y_test_score, y_pred_score, squared=False)
    num_features = X_train_text.shape[1]
    regression_results = pd.concat([regression_results, pd.DataFrame({
        'Method': [name],
        'MAE': [mae],
        'RMSE': [rmse],
        'Number of Features': [num_features],
        'Training Time': [train_time],
        'Testing Time': [test_time]
    })])

# Output verme
print("Classification Results:")
print(classification_results)
print("\nRegression Results:")
print(regression_results)
