In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor

# Load the dataset
data = pd.read_csv('car.csv')

# Select relevant features
relevant_features = [
    'Combined Mpg For Fuel Type1',
    'Highway Mpg For Fuel Type1',
    'City Mpg For Fuel Type1',
    'Fuel Type',
    'Transmission'
]

# Remove rows with missing values in relevant features
data = data[relevant_features].dropna()

# Convert categorical variables into numerical using one-hot encoding
data = pd.get_dummies(data, columns=['Fuel Type', 'Transmission'])

# Split the data into features and target variable
X = data.drop('Combined Mpg For Fuel Type1', axis=1)
y = data['Combined Mpg For Fuel Type1']

# Split the dataset into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train the Random Forest model
rf_model = RandomForestRegressor(n_estimators=100, random_state=42)
rf_model.fit(X_train, y_train)

def recommend_cars(features, model, dataset, top_n=5):
    # Create a DataFrame with the given features
    input_data = pd.DataFrame([features], columns=X.columns)
    
    # Predict using the trained model
    predicted_mpg = model.predict(input_data)
    
    # Find the closest matches based on predicted Combined Mpg
    dataset['Predicted Mpg'] = abs(dataset['Combined Mpg For Fuel Type1'] - predicted_mpg)
    recommended_cars = dataset.nsmallest(top_n, 'Predicted Mpg')
    
    return recommended_cars.drop('Predicted Mpg', axis=1)

# Example: Features for recommendation
user_features = {
    'City Mpg For Fuel Type1': 25,
    'Highway Mpg For Fuel Type1': 30,
    'Fuel Type_Diesel': 0,
    'Fuel Type_Regular Gasoline': 1,
    'Transmission_Automatic 5-spd': 1,
    'Transmission_Automatic 4-spd': 0
}

# Get recommendations based on user features
recommendations = recommend_cars(user_features, rf_model, data)
print(recommendations)


In [None]:
import pandas as pd
from sklearn.ensemble import RandomForestRegressor
from sklearn.impute import SimpleImputer

# Load the dataset
data = pd.read_csv('car.csv')

# Preprocess the data
# Fill missing values with mean for numeric columns
numeric_cols = data.select_dtypes(include=['number']).columns.tolist()
data[numeric_cols] = SimpleImputer(strategy='mean').fit_transform(data[numeric_cols])

# Define the features and target variable
features = data.drop(columns=['Annual Fuel Cost For Fuel Type1', 'Fuel Type', 'baseModel'])
target = data['Annual Fuel Cost For Fuel Type1']

# Train RandomForestRegressor model
rf_model = RandomForestRegressor()
rf_model.fit(features, target)

# Function to recommend cars based on user features
def recommend_cars(features, model, dataset, top_n=5):
    input_data = pd.DataFrame([features], columns=dataset.columns)
    predicted_cost = model.predict(input_data)
    dataset['Predicted Cost'] = abs(dataset['Annual Fuel Cost For Fuel Type1'] - predicted_cost)
    return dataset.nsmallest(top_n, 'Predicted Cost')

# Define user features (you can modify these values accordingly)
user_features = {
    'City Mpg For Fuel Type1': 25,
    'Highway Mpg For Fuel Type1': 30,
    'Engine displacement': 3.0,
    'Cylinders': 6,
    'Transmission_Automatic 4-spd': 0
}

# Get recommendations based on user features
recommendations = recommend_cars(user_features, rf_model, data)
print(recommendations)


In [None]:
from sklearn.impute import SimpleImputer

# Replace NaNs with the mean for numerical columns
numeric_imputer = SimpleImputer(strategy='mean')
features_encoded_numeric = numeric_imputer.fit_transform(features_encoded)

# Replace NaNs with the most frequent value for categorical columns
categorical_imputer = SimpleImputer(strategy='most_frequent')
features_encoded_complete = categorical_imputer.fit_transform(features_encoded_numeric)

# Now train your RandomForestRegressor with the imputed data
rf_model.fit(features_encoded_complete, target)


In [None]:
import pandas as pd
from sklearn.ensemble import RandomForestRegressor

# Load your dataset
# Replace 'your_dataset.csv' with the path to your dataset
data = pd.read_csv('car.csv')

# Assuming 'Annual Fuel Cost For Fuel Type1' is the target column
target = data['Annual Fuel Cost For Fuel Type1']

# Drop the target column from features
features = data.drop(columns=['Annual Fuel Cost For Fuel Type1'])

# Perform one-hot encoding on categorical columns
features_encoded = pd.get_dummies(features)

# Instantiate RandomForestRegressor and fit the model
rf_model = RandomForestRegressor()
rf_model.fit(features_encoded, target)

# Function to recommend cars based on user features
def recommend_cars(features, model, dataset, top_n=5):
    # Process user features
    input_data = pd.DataFrame([features], columns=dataset.columns)
    input_data_encoded = pd.get_dummies(input_data)
    
    # Make predictions
    predicted_values = model.predict(input_data_encoded)
    
    # Find closest matches based on predictions
    dataset['Predicted Values'] = abs(dataset['Annual Fuel Cost For Fuel Type1'] - predicted_values)
    recommended_cars = dataset.nsmallest(top_n, 'Predicted Values')
    
    return recommended_cars

# Example of user features
user_features = {
    'City Mpg For Fuel Type1': 25,
    'Highway Mpg For Fuel Type1': 30,
    # Include other features here...
    'Transmission_Automatic 4-spd': 0
}

# Get recommendations based on user features
recommendations = recommend_cars(user_features, rf_model, data)
print(recommendations)


In [None]:
import pandas as pd
from sklearn.ensemble import RandomForestRegressor

# Load the dataset
data = pd.read_csv('car.csv')

# Clean the data
# Assuming relevant features include: 'City Mpg For Fuel Type1', 'Highway Mpg For Fuel Type1', 'Combined Mpg For Fuel Type1',
# 'Fuel Type', 'Vehicle Size Class'

relevant_features = ['City Mpg For Fuel Type1', 'Highway Mpg For Fuel Type1', 
                     'Combined Mpg For Fuel Type1', 'Fuel Type', 'Vehicle Size Class']

cleaned_data = data[relevant_features].dropna()  # Dropping rows with missing values

# Convert categorical variables to numerical using One-Hot Encoding
cleaned_data = pd.get_dummies(cleaned_data, columns=['Fuel Type', 'Vehicle Size Class'])

# Prepare data for model
X = cleaned_data.drop('Combined Mpg For Fuel Type1', axis=1)  # Features
y = cleaned_data['Combined Mpg For Fuel Type1']  # Target variable

# Train Random Forest model
model = RandomForestRegressor(n_estimators=100, random_state=42)
model.fit(X, y)

In [None]:
unique_fuel_types = data['Fuel Type'].unique()
print(unique_fuel_types)

In [None]:
# Function to recommend cars based on user input
def recommend_cars(city_mpg, highway_mpg, fuel_type, vehicle_size_class):
    # Ensure the 'Fuel Type' feature exists in the encoded columns
    fuel_types = ['Diesel', 'Regular', 'Premium', 'Gasoline or E85', 'CNG', 'Premium or E85',
                  'Electricity', 'Midgrade', 'Regular Gas and Electricity', 'Premium and Electricity',
                  'Premium Gas or Electricity', 'Gasoline or natural gas', 'Gasoline or propane',
                  'Regular Gas or Electricity']  # Add all fuel types seen at fit time
    for col in fuel_types:
        if 'Fuel Type_' + col not in X.columns:
            X['Fuel Type_' + col] = 0  # Add the new fuel type with zeros

    input_data = pd.DataFrame({
        'City Mpg For Fuel Type1': [city_mpg],
        'Highway Mpg For Fuel Type1': [highway_mpg],
    })

    # Ensure all possible fuel types are considered
    for col in fuel_types:
        input_data['Fuel Type_' + col] = 0

    # Update the fuel type value to 1 for the provided input
    input_data['Fuel Type_' + fuel_type] = 1

    # Use the model to predict combined mpg
    predicted_combined_mpg = model.predict(input_data)

    # Find top 5 cars based on predicted combined mpg
    recommended_cars = cleaned_data.copy()
    recommended_cars['Predicted Combined Mpg'] = model.predict(X)
    recommended_cars = recommended_cars.nlargest(5, 'Predicted Combined Mpg')

    return recommended_cars

# Example usage: recommending cars based on input parameters
recommended_cars = recommend_cars(city_mpg=25, highway_mpg=30, fuel_type='Regular Gasoline', vehicle_size_class='Midsize Cars')
print(recommended_cars)


In [None]:
import pandas as pd
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

# Read the car data
data = pd.read_csv("car.csv")

# Define relevant features (choose based on your analysis)
features = ["Make", "Model", "City Mpg For Fuel Type1", "Combined Mpg For Fuel Type1", "Fuel Type"]

# Clean the data (handle missing values, outliers, etc.)
# Replace missing values with mean/median/mode based on data type and distribution
data[features].fillna(data[features].mean(), inplace=True)
# Convert categorical features to numerical if needed (e.g., one-hot encoding)
data["Make"] = data["Make"].astype("category").cat.codes
data["Model"] = data["Model"].astype("category").cat.codes
data["Fuel Type"] = data["Fuel Type"].astype("category").cat.codes

# Prepare data for model training
X = data[features]
y = data["Make"]  # Choose target variable based on your recommendation goal

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

# Build and train the Random Forest model
model = RandomForestClassifier(n_estimators=100)
model.fit(X_train, y_train)

# Evaluate model performance (optional)
predictions = model.predict(X_test)
accuracy = accuracy_score(y_test, predictions)
print(f"Model accuracy: {accuracy:.4f}")

# Use the model for recommendations
# Define desired features for your recommended cars
desired_features = {"Make": "Honda", "City Mpg For Fuel Type1": 30, "Combined Mpg For Fuel Type1": 40, "Fuel Type": "Hybrid"}

# Filter data based on desired features
filtered_data = data[
    (data["Make"] == desired_features["Make"])
    & (data["City Mpg For Fuel Type1"] >= desired_features["City Mpg For Fuel Type1"])
    & (data["Combined Mpg For Fuel Type1"] >= desired_features["Combined Mpg For Fuel Type1"])
    & (data["Fuel Type"] == desired_features["Fuel Type"])
]

# Predict Make for the filtered data
filtered_data["Predicted_Make"] = model.predict(filtered_data[features])

# Recommend top 5 cars with all features
recommended_cars = filtered_data.sort_values("Predicted_Make").head(5)

# Print recommended cars with their features
for i, car in recommended_cars.iterrows():
    print(f"\nRecommended car #{i+1}:")
    for feature in features:
        print(f"\t-{feature}: {car[feature]}")



In [1]:
import pandas as pd
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
# ... other libraries as needed


In [2]:
data = pd.read_csv("car.csv")

In [3]:
features = ["Make", "Model", "Fuel Type", "City Mpg For Fuel Type1", "Combined Mpg For Fuel Type1"]  # Replace with your chosen features


In [5]:
# Prepare data for model training
X = data[features]
y = data["Make"]  # Choose target variable based on your recommendation goal

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)


In [6]:
model = RandomForestClassifier()  # Adjust hyperparameters as needed
model.fit(X_train, y_train)


ValueError: could not convert string to float: 'American Motors Corporation'

In [None]:
predictions = model.predict(X_test)
accuracy = accuracy_score(y_test, predictions)
print(f"Model accuracy: {accuracy:.4f}")


In [2]:
from flask import Flask, render_template, request
import pandas as pd
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split

app = Flask(__name__, static_url_path='/static')

# Load and preprocess your data
data = pd.read_csv("car.csv")

# Columns to drop
columns_to_drop = [
    "Guzzler", "Transmission descriptor", "T Charger", "S Charger", "ATV Type", 
    "Fuel Type2", "Epa Range For Fuel Type2", "Electric motor", "MFR Code", 
    "c240Dscr", "C240B Dscr", "Start-Stop"
]

# Drop specified columns
data = data.drop(columns_to_drop, axis=1)

# Fill missing values in 'Engine displacement' with the mean
data["Engine displacement"].fillna(data["Engine displacement"].mean(), inplace=True)

# List of features
features = [
    "Annual Petroleum Consumption For Fuel Type1",
    "Highway Mpg For Fuel Type1",
    "Engine displacement",
    "Hatchback luggage volume",
    "EPA model type index"
    # Add more features as needed
]

# Define X (features) and y (target)
X = data[features]
y = data["Make"]

# Split data into train and test sets
# Adjust test_size and random_state as needed
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Initialize and train the model
model = RandomForestClassifier()
model.fit(X_train, y_train)


In [12]:
print(data.isnull().sum())

Make                                           0
Model                                          0
Annual Petroleum Consumption For Fuel Type1    0
Annual Petroleum Consumption For Fuel Type2    0
Time to charge at 120V                         0
                                              ..
Modified On                                    0
PHEV City                                      0
PHEV Highway                                   0
PHEV Combined                                  0
baseModel                                      0
Length: 72, dtype: int64


In [5]:
@app.route('/')
def index():
    return render_template('car.html')

@app.route('/get_recommendations', methods=['POST'])
def get_recommendations():
    if request.method == 'POST':
        user_preferences = {}
        for feature in features:
            user_value = request.form.get(feature)
            user_preferences[feature] = float(user_value) if user_value else None

        user_input = pd.DataFrame(user_preferences, index=[0])

        # Ensure that there are no missing values in the input
# Inside get_recommendations() function in app.py
        user_preferences = {}
        for feature in features:
            user_value = request.form.get(feature)
            if user_value:  # Check if the value is not empty or None
                user_preferences[feature] = float(user_value)
            else:
                # Handle missing values here, for instance, you can assign a default value
                user_preferences[feature] = 0.0  # Or any default value appropriate for your data



        predicted_make = model.predict(user_input)[0]
        recommended_cars = data[data['Make'] == predicted_make].sort_values(by=features, ascending=False).head(5)

        return render_template('recommendations.html', recommendations=recommended_cars)

@app.route('/home')
def home():
    return render_template('home.html')

@app.route('/about')
def about():
    return render_template('about.html')

@app.route('/login')
def login():
    return render_template('login.html')


if __name__ == "__main__":
    app.run(debug=True)


AssertionError: View function mapping is overwriting an existing endpoint function: index