<a href="https://colab.research.google.com/github/nurullahkilic/my-json-server/blob/main/Music_e%C4%9Fitme.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import accuracy_score

In [5]:
# Load datasets
train_data = pd.read_csv('/content/train.csv')
test_data = pd.read_csv('/content/test.csv')


In [6]:
# Display dataset overview
def dataset_overview(data, name):
    print(f"{name} Dataset Overview:")
    print(data.info())
    print("\nNumber of rows and columns:", data.shape)
    print(data.describe())
    print("\nMissing values:")
    print(data.isnull().sum())

# Overview for train and test datasets
dataset_overview(train_data, "Train")
dataset_overview(test_data, "Test")


Train Dataset Overview:
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 17996 entries, 0 to 17995
Data columns (total 17 columns):
 #   Column              Non-Null Count  Dtype  
---  ------              --------------  -----  
 0   Artist Name         17996 non-null  object 
 1   Track Name          17996 non-null  object 
 2   Popularity          17568 non-null  float64
 3   danceability        17996 non-null  float64
 4   energy              17996 non-null  float64
 5   key                 15982 non-null  float64
 6   loudness            17996 non-null  float64
 7   mode                17996 non-null  int64  
 8   speechiness         17996 non-null  float64
 9   acousticness        17996 non-null  float64
 10  instrumentalness    13619 non-null  float64
 11  liveness            17996 non-null  float64
 12  valence             17996 non-null  float64
 13  tempo               17996 non-null  float64
 14  duration_in min/ms  17996 non-null  float64
 15  time_signature      17996 non

In [8]:
# Preprocessing
def preprocess_data(data):
    # Handle missing values (example: fill with mean for numerical columns)
    for col in data.columns:
        if data[col].isnull().sum() > 0:
            if data[col].dtype in ['int64', 'float64']:
                data[col] = data[col].fillna(data[col].mean())
            else:
                data[col] = data[col].fillna(data[col].mode()[0])

    # Encoding categorical variables
    label_encoders = {}
    for col in data.select_dtypes(include=['object']).columns:
        le = LabelEncoder()
        data[col] = le.fit_transform(data[col])
        label_encoders[col] = le

    return data, label_encoders

train_data, train_encoders = preprocess_data(train_data)
test_data, test_encoders = preprocess_data(test_data)

In [30]:
# Split train dataset into training and validation sets
X = train_data.iloc[:, :-1]  # Features
y = train_data.iloc[:, -1]   # Target

X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)


In [31]:
# Standardize features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_val = scaler.transform(X_val)
X_test = scaler.transform(test_data)

In [32]:
# Train and evaluate classifiers
def train_and_evaluate_model(model, model_name):
    model.fit(X_train, y_train)
    y_pred = model.predict(X_val)
    acc = accuracy_score(y_val, y_pred)
    print(f"Accuracy of {model_name}: {acc:.2f}")
    return model, acc


In [49]:
# Decision Tree
dt_model = DecisionTreeClassifier(random_state=42)
dt_model, dt_acc = train_and_evaluate_model(dt_model, "Decision Tree")

Accuracy of Decision Tree: 0.38


In [48]:
knn_model = KNeighborsClassifier()
knn_model, knn_acc = train_and_evaluate_model(knn_model, "k-Nearest Neighbors")

Accuracy of k-Nearest Neighbors: 0.43


In [47]:
# Artificial Neural Network
ann_model = MLPClassifier(random_state=42, max_iter=300)
ann_model, ann_acc = train_and_evaluate_model(ann_model, "Artificial Neural Network")

Accuracy of Artificial Neural Network: 0.53




In [46]:
# Compare accuracies
models = {"Decision Tree": dt_acc, "k-Nearest Neighbors": knn_acc, "Artificial Neural Network": ann_acc}

print("\nModel Performance Comparison:")
for model_name, accuracy in models.items():
    print(f"{model_name}: {accuracy:.2f}")


Model Performance Comparison:
Decision Tree: 0.38
k-Nearest Neighbors: 0.43
Artificial Neural Network: 0.52


In [55]:
# Final model selection and test set prediction
modelsName = {"Decision Tree": "dt_model", "k-Nearest Neighbors": "knn_acc", "Artificial Neural Network": "ann_model"}
best_model_name = max(models, key=models.get)
print(f"\nBest Model: {best_model_name}")
best_model = eval(f"{modelsName[best_model_name]}")
y_test_pred = best_model.predict(X_test)


Best Model: Artificial Neural Network


In [56]:
# Save predictions
test_data['Predictions'] = y_test_pred
test_data.to_csv('/content/test_predictions.csv', index=False)
print("Predictions saved to 'test_predictions.csv'")

Predictions saved to 'test_predictions.csv'
