<a href="https://colab.research.google.com/github/sinchanMS12/Text-Summarization-and-Optimization/blob/main/test_rank_algorithm.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.metrics import accuracy_score
from sklearn.neighbors import KNeighborsClassifier

# Step 1: Data Upload
def upload_data():
    file_path = "/content/amazon_trends.csv"
    dataset = pd.read_csv(file_path)
    print("Dataset loaded with shape:", dataset.shape)
    return dataset

# Step 2: Data Preprocessing
def preprocess_data(dataset):
    print("First few rows of the dataset before preprocessing:")
    print(dataset.head())

    dataset = dataset.dropna()
    print("Dataset shape after dropping missing values:", dataset.shape)

    if 'Date' in dataset.columns:
        dataset['Date'] = pd.to_datetime(dataset['Date'])
        dataset['Year'] = dataset['Date'].dt.year
        dataset['Month'] = dataset['Date'].dt.month
        dataset['Day'] = dataset['Date'].dt.day
        dataset = dataset.drop('Date', axis=1)

    for column in dataset.select_dtypes(include=['object']).columns:
        encoder = LabelEncoder()
        dataset[column] = encoder.fit_transform(dataset[column])

    X = dataset.iloc[:, :-1].values
    y = dataset.iloc[:, -1].values

    print("Feature shape:", X.shape)
    print("Target shape:", y.shape)

    scaler = StandardScaler()
    X = scaler.fit_transform(X)

    return X, y

# Step 3: Model Training and Evaluation
def evaluate_model(X, y, n_neighbors):
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
    model = KNeighborsClassifier(n_neighbors=n_neighbors)
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    accuracy = accuracy_score(y_test, y_pred)
    return accuracy

# Test Rank Algorithm for Optimization
def test_rank_optimization(X, y):
    accuracies = []
    neighbor_counts = list(range(1, 51))  # Testing neighbor values from 1 to 50

    for n in neighbor_counts:
        accuracy = evaluate_model(X, y, n)
        accuracies.append(accuracy)
        print(f"Test Rank: Neighbors = {n}, Accuracy = {accuracy:.4f}")

    best_accuracy_index = np.argmax(accuracies)
    best_neighbors = neighbor_counts[best_accuracy_index]

    plt.figure(figsize=(10, 6))
    plt.plot(neighbor_counts, accuracies, marker='o', color='blue', label='Accuracy vs. Neighbors')
    plt.xlabel('Number of Neighbors')
    plt.ylabel('Accuracy')
    plt.title('KNN Accuracy for Different Number of Neighbors')
    plt.legend()
    plt.grid(True)
    plt.show()

    return best_neighbors

# Main execution function
def main():
    dataset = upload_data()
    X, y = preprocess_data(dataset)
    optimal_neighbors = test_rank_optimization(X, y)
    print(f"Optimal number of neighbors: {optimal_neighbors}")
    accuracy = evaluate_model(X, y, optimal_neighbors)
    print(f"Accuracy with optimal number of neighbors: {accuracy:.2f}")

if __name__ == "__main__":
    main()


FileNotFoundError: [Errno 2] No such file or directory: '/content/amazon_trends.csv'