In [12]:
import pandas as pd
import numpy as np
import streamlit as st
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
import pickle

In [13]:
df = pd.read_csv("cardata.csv")  

In [14]:
df

Unnamed: 0,Car_Name,Year,Selling_Price,Present_Price,Kms_Driven,Fuel_Type,Seller_Type,Transmission,Owner
0,ritz,2014,3.35,5.59,27000,Petrol,Dealer,Manual,0
1,sx4,2013,4.75,9.54,43000,Diesel,Dealer,Manual,0
2,ciaz,2017,7.25,9.85,6900,Petrol,Dealer,Manual,0
3,wagon r,2011,2.85,4.15,5200,Petrol,Dealer,Manual,0
4,swift,2014,4.60,6.87,42450,Diesel,Dealer,Manual,0
...,...,...,...,...,...,...,...,...,...
296,city,2016,9.50,11.60,33988,Diesel,Dealer,Manual,0
297,brio,2015,4.00,5.90,60000,Petrol,Dealer,Manual,0
298,city,2009,3.35,11.00,87934,Petrol,Dealer,Manual,0
299,city,2017,11.50,12.50,9000,Diesel,Dealer,Manual,0


In [15]:
df.columns

Index(['Car_Name', 'Year', 'Selling_Price', 'Present_Price', 'Kms_Driven',
       'Fuel_Type', 'Seller_Type', 'Transmission', 'Owner'],
      dtype='object')

In [16]:
df['Combined_Features'] = df['Fuel_Type'] + " " + df['Seller_Type'] + " " + df['Transmission']

In [17]:
def train_model(df):
    tfidf = TfidfVectorizer(stop_words='english')
    matrix = tfidf.fit_transform(df['Combined_Features'])
    similarity = cosine_similarity(matrix, matrix)
        # Save the model
    with open("car_recommendation_model.pkl", "wb") as f:
        pickle.dump(similarity, f)
    with open("tfidf_vectorizer.pkl", "wb") as f:
        pickle.dump(tfidf, f)
    
    return similarity

    

In [18]:
def recommend_cars(selected_car, df, similarity, num_recommendations=5):
    car_index = df[df['Car_Name'] == selected_car].index[0]
    scores = list(enumerate(similarity[car_index]))
    scores = sorted(scores, key=lambda x: x[1], reverse=True)[1:num_recommendations+1]
    recommended_indices = [score[0] for score in scores]
    return df.iloc[recommended_indices]


In [19]:
similarity_matrix = train_model(df)

In [20]:
similarity_matrix

array([[1.        , 0.47826333, 1.        , ..., 1.        , 0.47826333,
        1.        ],
       [0.47826333, 1.        , 0.47826333, ..., 0.47826333, 1.        ,
        0.47826333],
       [1.        , 0.47826333, 1.        , ..., 1.        , 0.47826333,
        1.        ],
       ...,
       [1.        , 0.47826333, 1.        , ..., 1.        , 0.47826333,
        1.        ],
       [0.47826333, 1.        , 0.47826333, ..., 0.47826333, 1.        ,
        0.47826333],
       [1.        , 0.47826333, 1.        , ..., 1.        , 0.47826333,
        1.        ]])