In [1]:
# Import pandas if you haven't already
import pandas as pd

# Load the dataset using the correct path from your home directory
# The '~' symbol represents your home folder (/Users/mohitkumar)
df = pd.read_csv("/Users/mohitkumar/Documents/EV_Energy_Consumption_Dataset.csv")

# Now verify it loaded correctly by printing the first 5 rows
print(df.head())

   Vehicle_ID            Timestamp   Speed_kmh  Acceleration_ms2  \
0        1102  2024-01-01 00:00:00  111.507366         -2.773816   
1        1435  2024-01-01 00:01:00   48.612323         -0.796982   
2        1860  2024-01-01 00:02:00  108.733320          0.253800   
3        1270  2024-01-01 00:03:00   38.579484         -2.111395   
4        1106  2024-01-01 00:04:00   57.172438          1.477883   

   Battery_State_%  Battery_Voltage_V  Battery_Temperature_C  Driving_Mode  \
0        30.415148         378.091525              25.314786             2   
1        97.385534         392.718377              18.240755             1   
2        84.912600         398.993495              44.449145             1   
3        28.777904         358.128273              28.980155             1   
4        29.740160         310.888162              33.184551             2   

   Road_Type  Traffic_Condition   Slope_%  Weather_Condition  Temperature_C  \
0          1                  1  6.879446  

In [2]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 5000 entries, 0 to 4999
Data columns (total 19 columns):
 #   Column                  Non-Null Count  Dtype  
---  ------                  --------------  -----  
 0   Vehicle_ID              5000 non-null   int64  
 1   Timestamp               5000 non-null   object 
 2   Speed_kmh               5000 non-null   float64
 3   Acceleration_ms2        5000 non-null   float64
 4   Battery_State_%         5000 non-null   float64
 5   Battery_Voltage_V       5000 non-null   float64
 6   Battery_Temperature_C   5000 non-null   float64
 7   Driving_Mode            5000 non-null   int64  
 8   Road_Type               5000 non-null   int64  
 9   Traffic_Condition       5000 non-null   int64  
 10  Slope_%                 5000 non-null   float64
 11  Weather_Condition       5000 non-null   int64  
 12  Temperature_C           5000 non-null   float64
 13  Humidity_%              5000 non-null   float64
 14  Wind_Speed_ms           5000 non-null   

In [3]:
# Get a summary: column names, count of non-empty values, and data types
print("--- Data Summary (df.info()) ---")
df.info()

--- Data Summary (df.info()) ---
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 5000 entries, 0 to 4999
Data columns (total 19 columns):
 #   Column                  Non-Null Count  Dtype  
---  ------                  --------------  -----  
 0   Vehicle_ID              5000 non-null   int64  
 1   Timestamp               5000 non-null   object 
 2   Speed_kmh               5000 non-null   float64
 3   Acceleration_ms2        5000 non-null   float64
 4   Battery_State_%         5000 non-null   float64
 5   Battery_Voltage_V       5000 non-null   float64
 6   Battery_Temperature_C   5000 non-null   float64
 7   Driving_Mode            5000 non-null   int64  
 8   Road_Type               5000 non-null   int64  
 9   Traffic_Condition       5000 non-null   int64  
 10  Slope_%                 5000 non-null   float64
 11  Weather_Condition       5000 non-null   int64  
 12  Temperature_C           5000 non-null   float64
 13  Humidity_%              5000 non-null   float64
 14  Wind_Sp

In [4]:
# --- Data Preprocessing ---

# Drop the Timestamp and Vehicle_ID columns as they aren't needed for predicting energy consumption
df_processed = df.drop(['Timestamp', 'Vehicle_ID'], axis=1)
print("Dropped Timestamp and Vehicle_ID columns.")

# Identify the categorical columns (even though they are numbers, they represent categories)
categorical_cols = ['Driving_Mode', 'Road_Type', 'Traffic_Condition', 'Weather_Condition']

# Perform one-hot encoding to convert categories into numerical format suitable for ML
df_processed = pd.get_dummies(df_processed, columns=categorical_cols, prefix=categorical_cols, drop_first=True)
print("Performed one-hot encoding on categorical columns.")

# Display the first 5 rows of the processed data to see the new columns
print("\n--- Processed DataFrame (df_processed.head()) ---")
print(df_processed.head())

# Display the new list of columns
print("\nNew columns:", df_processed.columns)

Dropped Timestamp and Vehicle_ID columns.
Performed one-hot encoding on categorical columns.

--- Processed DataFrame (df_processed.head()) ---
    Speed_kmh  Acceleration_ms2  Battery_State_%  Battery_Voltage_V  \
0  111.507366         -2.773816        30.415148         378.091525   
1   48.612323         -0.796982        97.385534         392.718377   
2  108.733320          0.253800        84.912600         398.993495   
3   38.579484         -2.111395        28.777904         358.128273   
4   57.172438          1.477883        29.740160         310.888162   

   Battery_Temperature_C   Slope_%  Temperature_C  Humidity_%  Wind_Speed_ms  \
0              25.314786  6.879446       0.741770   42.172533       7.829253   
1              18.240755 -3.007212      -3.495516   57.018427       4.495572   
2              44.449145  0.029585       9.248275   69.028911       5.144489   
3              28.980155  8.271943       2.868409   86.638349       4.518283   
4              33.184551  2.7

In [5]:
import altair as alt
import pandas as pd # Make sure pandas is imported if you restarted the kernel

# If df_processed isn't defined anymore (e.g., after kernel restart), reload and re-process:
# df = pd.read_csv("~/JupyterLab/EV_Energy_Consumption_Dataset.csv")
# df_processed = df.drop(['Timestamp', 'Vehicle_ID'], axis=1)
# categorical_cols = ['Driving_Mode', 'Road_Type', 'Traffic_Condition', 'Weather_Condition']
# df_processed = pd.get_dummies(df_processed, columns=categorical_cols, prefix=categorical_cols, drop_first=True)


# --- EDA Visualization: Speed vs. Energy ---

# Create the scatter plot using the processed DataFrame
speed_chart = alt.Chart(df_processed).mark_circle(opacity=0.5).encode(
    x=alt.X('Speed_kmh', title='Speed (km/h)'),
    y=alt.Y('Energy_Consumption_kWh', title='Energy Consumption (kWh)'),
    tooltip=['Speed_kmh', 'Temperature_C', 'Energy_Consumption_kWh'] # Show these values on hover
).properties(
    title='Energy Consumption vs. Speed'
).interactive() # Allow zooming and panning

# Save the chart as a JSON file
speed_chart.save('energy_vs_speed_scatterplot.json')

print("Scatter plot 'energy_vs_speed_scatterplot.json' saved successfully.")

Scatter plot 'energy_vs_speed_scatterplot.json' saved successfully.


In [6]:
# --- Data Preprocessing ---

# Drop the Timestamp and Vehicle_ID columns
df_processed = df.drop(['Timestamp', 'Vehicle_ID'], axis=1)
print("Dropped Timestamp and Vehicle_ID columns.")

# Identify the categorical columns
categorical_cols = ['Driving_Mode', 'Road_Type', 'Traffic_Condition', 'Weather_Condition']

# Perform one-hot encoding
df_processed = pd.get_dummies(df_processed, columns=categorical_cols, prefix=categorical_cols, drop_first=True)
print("Performed one-hot encoding on categorical columns.")

# Display the first 5 rows of the processed data
print("\n--- Processed DataFrame (df_processed.head()) ---")
print(df_processed.head())

Dropped Timestamp and Vehicle_ID columns.
Performed one-hot encoding on categorical columns.

--- Processed DataFrame (df_processed.head()) ---
    Speed_kmh  Acceleration_ms2  Battery_State_%  Battery_Voltage_V  \
0  111.507366         -2.773816        30.415148         378.091525   
1   48.612323         -0.796982        97.385534         392.718377   
2  108.733320          0.253800        84.912600         398.993495   
3   38.579484         -2.111395        28.777904         358.128273   
4   57.172438          1.477883        29.740160         310.888162   

   Battery_Temperature_C   Slope_%  Temperature_C  Humidity_%  Wind_Speed_ms  \
0              25.314786  6.879446       0.741770   42.172533       7.829253   
1              18.240755 -3.007212      -3.495516   57.018427       4.495572   
2              44.449145  0.029585       9.248275   69.028911       5.144489   
3              28.980155  8.271943       2.868409   86.638349       4.518283   
4              33.184551  2.7

In [7]:
# --- Data Preprocessing ---
df_processed = df.drop(['Timestamp', 'Vehicle_ID'], axis=1)
categorical_cols = ['Driving_Mode', 'Road_Type', 'Traffic_Condition', 'Weather_Condition']
df_processed = pd.get_dummies(df_processed, columns=categorical_cols, prefix=categorical_cols, drop_first=True)
print(df_processed.head())

    Speed_kmh  Acceleration_ms2  Battery_State_%  Battery_Voltage_V  \
0  111.507366         -2.773816        30.415148         378.091525   
1   48.612323         -0.796982        97.385534         392.718377   
2  108.733320          0.253800        84.912600         398.993495   
3   38.579484         -2.111395        28.777904         358.128273   
4   57.172438          1.477883        29.740160         310.888162   

   Battery_Temperature_C   Slope_%  Temperature_C  Humidity_%  Wind_Speed_ms  \
0              25.314786  6.879446       0.741770   42.172533       7.829253   
1              18.240755 -3.007212      -3.495516   57.018427       4.495572   
2              44.449145  0.029585       9.248275   69.028911       5.144489   
3              28.980155  8.271943       2.868409   86.638349       4.518283   
4              33.184551  2.776814      16.750244   27.189185       4.263406   

   Tire_Pressure_psi  ...  Energy_Consumption_kWh  Driving_Mode_2  \
0          31.112020  .

In [8]:
# --- Task 5: Build Model - Step A: Separate Features and Target ---
target_column = 'Energy_Consumption_kWh'
y = df_processed[target_column]
X = df_processed.drop(target_column, axis=1)
print(X.head())

    Speed_kmh  Acceleration_ms2  Battery_State_%  Battery_Voltage_V  \
0  111.507366         -2.773816        30.415148         378.091525   
1   48.612323         -0.796982        97.385534         392.718377   
2  108.733320          0.253800        84.912600         398.993495   
3   38.579484         -2.111395        28.777904         358.128273   
4   57.172438          1.477883        29.740160         310.888162   

   Battery_Temperature_C   Slope_%  Temperature_C  Humidity_%  Wind_Speed_ms  \
0              25.314786  6.879446       0.741770   42.172533       7.829253   
1              18.240755 -3.007212      -3.495516   57.018427       4.495572   
2              44.449145  0.029585       9.248275   69.028911       5.144489   
3              28.980155  8.271943       2.868409   86.638349       4.518283   
4              33.184551  2.776814      16.750244   27.189185       4.263406   

   Tire_Pressure_psi  ...  Distance_Travelled_km  Driving_Mode_2  \
0          31.112020  ..

In [9]:
# --- Task 5: Build Model - Step B: Split Data ---

from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_absolute_error, r2_score

# First, make sure your X and y variables exist from the previous step!
# If you get a "NameError", re-run the cell from Week 1 where you defined X and y.

# Split the data into training (80%) and testing (20%) sets
# random_state=42 ensures you get the same "random" split every time you run this
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

print("Data split complete.")
print("Training set has:", X_train.shape[0], "samples")
print("Testing set has:", X_test.shape[0], "samples")


# --- Task 5: Build Model - Step C: Train Model ---

# Create an instance of the Linear Regression model
model = LinearRegression()

# Train (or "fit") the model on your training data
model.fit(X_train, y_train)

print("\nModel training complete.")


# --- Task 6: Evaluate the Model ---

# Use the trained model to make predictions on the test set
y_pred = model.predict(X_test)

# Calculate the evaluation metrics
# R-squared (Your goal is > 0.80)
r2 = r2_score(y_test, y_pred)

# Mean Absolute Error (MAE) (We want this as low as possible)
mae = mean_absolute_error(y_test, y_pred)

print("\n--- Model Evaluation ---")
print(f"R-squared (R²): {r2:.4f}")
print(f"Mean Absolute Error (MAE): {mae:.4f} kWh")

# Check if you met your R-squared goal
if r2 > 0.80:
    print("\nCongratulations! You met your R-squared goal of > 0.80.")
else:
    print("\nGood first model! The R-squared is below 0.80, but we can improve this.")

Data split complete.
Training set has: 4000 samples
Testing set has: 1000 samples

Model training complete.

--- Model Evaluation ---
R-squared (R²): 0.9461
Mean Absolute Error (MAE): 0.4106 kWh

Congratulations! You met your R-squared goal of > 0.80.


In [10]:
import google.generativeai as genai
import os

# --- Task 3: Test Gen AI API (Corrected) ---

# IMPORTANT: Paste your API key here
API_KEY = "AIzaSyDmwfFQbyQ10UoqOZ4qoEhkclAlDAOCu1Y"
genai.configure(api_key=API_KEY)

# Use a valid model name FROM YOUR LIST
model_name = "models/gemini-pro-latest" 

print(f"Using model: {model_name}")
print("Sending test prompt to Gemini...")

try:
    # Create the model with the correct name
    model = genai.GenerativeModel(model_name)

    # Send the prompt
    response = model.generate_content("What is the main benefit of an electric vehicle?")
    
    # Print the response from the AI
    print("\n--- Gemini AI Response ---")
    print(response.text)
    
except Exception as e:
    print(f"\nAn error occurred: {e}")

Using model: models/gemini-pro-latest
Sending test prompt to Gemini...

--- Gemini AI Response ---
Excellent question. While there are several major advantages, the single most significant and commonly cited main benefit of an electric vehicle (EV) is its **zero tailpipe emissions**.

This primary benefit leads to two crucial positive impacts:

1.  **Environmental Benefits:** By not burning gasoline or diesel, EVs produce no carbon dioxide (CO2), nitrogen oxides (NOx), or particulate matter from the tailpipe. This directly helps to combat climate change and reduces the vehicle's overall carbon footprint over its lifetime, especially when charged with renewable energy.

2.  **Public Health Benefits:** The elimination of tailpipe emissions dramatically improves local air quality. This is particularly important in dense urban areas, where vehicle pollution is a major contributor to respiratory illnesses, asthma, and other health problems. Cleaner air means a healthier population.

---

##

In [11]:
# Create an instance of the Linear Regression model
model = LinearRegression()

# Train (or "fit") the model on your training data
model.fit(X_train, y_train)

0,1,2
,fit_intercept,True
,copy_X,True
,tol,1e-06
,n_jobs,
,positive,False


In [12]:
import joblib

# --- Task 1 (Week 3): Save the Model ---

# Save the trained model to a file named 'model.pkl'
# We use .pkl (pickle) as a common extension for saved models
joblib.dump(model, 'model.pkl')

print("Model saved successfully as 'model.pkl'")

Model saved successfully as 'model.pkl'


In [13]:
import joblib
joblib.dump(list(X.columns), 'model_columns.pkl')
print("Model columns saved successfully as 'model_columns.pkl'")

Model columns saved successfully as 'model_columns.pkl'
