In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.metrics import classification_report, accuracy_score

In [5]:
df = pd.read_csv("Processed_Mobile_dataset_5.csv")

In [7]:
df['RAM (GB)'] = pd.to_numeric(df['RAM (GB)'], errors='coerce')
df['ROM (GB)'] = pd.to_numeric(df['ROM (GB)'], errors='coerce')

# Drop rows with NaN in RAM and ROM
df.dropna(subset=['RAM (GB)', 'ROM (GB)'], inplace=True)

# Define price category function
def price_category(price):
    if price <= 10000:
        return 'Low'
    elif price <= 20000:
        return 'Mid'
    else:
        return 'High'

# Apply price category function
df['Price_Category'] = df['Price (₹)'].apply(price_category)

# Encode price categories into numeric labels
le = LabelEncoder()
df['Price_Label'] = le.fit_transform(df['Price_Category'])

# Prepare features and target variable
X = df.drop(columns=['Price (₹)', 'Price_Category', 'Price_Label'])
y = df['Price_Label']

# Scale features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Initialize the Random Forest model
rf = RandomForestClassifier(n_estimators=100, random_state=42)

# Apply 5-fold cross-validation
cv_scores = cross_val_score(rf, X_scaled, y, cv=5)

# Print the average accuracy across the folds
print("Average Cross-Validation Accuracy:", cv_scores.mean())
print("Cross-Validation Scores for Each Fold:", cv_scores)

Average Cross-Validation Accuracy: 0.9055166217430368
Cross-Validation Scores for Each Fold: [0.87735849 0.90566038 0.90566038 0.91509434 0.92380952]
