<a href="https://colab.research.google.com/github/madiha-ahmed-chowdhury/ASRRO_Shared-Task-Hunt/blob/main/Linear_Regression_Car_Price_Prediction.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import numpy as np
import matplotlib.pyplot as plt

In [None]:
data = pd.read_csv('/content/train.csv')

# Task 1

**Data** **Selection**

In [None]:
data['Levy'] = pd.to_numeric(data['Levy'].replace({'-': '0'}), errors='coerce')
filtered_data = data[(data['Fuel type'] !='Diesel') & (data['Prod. year']>=2005)]
filtered_data.head(2)

In [None]:
filtered_data.tail(2)

**Data Transformation**

In [None]:
average_levy = filtered_data['Levy'].sum()/len(filtered_data)

# Task 2

**Data Preparation**

In [None]:
selected_features = ['Price', 'Levy', 'Mileage', 'Prod. year']
prepared_data = filtered_data[selected_features].dropna()
prepared_data['Mileage'] = prepared_data['Mileage'].str.replace('km', '').astype(int)
prepared_data.head(2)

In [None]:
train_data, test_data = train_test_split(prepared_data, test_size=0.1, random_state=42)
X_train = train_data.drop(columns=['Price']).values
y_train = train_data['Price'].values
X_test = test_data.drop(columns=['Price']).values
y_test = test_data['Price'].values

In [None]:
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

**Training the Model**

In [None]:
class LinearRegressionGD:
    def __init__(self, learning_rate, iterations):
        self.learning_rate = learning_rate
        self.iterations = iterations
        self.weights = None
        self.bias = None
        self.cost_history = []

    def fit(self, X, y):
        n_samples, n_features = X.shape
        self.weights = np.zeros(n_features)
        self.bias = 0

        for i in range(self.iterations):
            # Compute predictions
            y_pred = np.dot(X, self.weights) + self.bias

            # Compute gradients
            dw = (1 / n_samples) * np.dot(X.T, (y_pred - y))
            db = (1 / n_samples) * np.sum(y_pred - y)

            # Update weights and bias
            self.weights -= self.learning_rate * dw
            self.bias -= self.learning_rate * db

            # Compute cost and store it
            cost = np.mean((y_pred- y) ** 2)
            self.cost_history.append(cost)

    def predict(self, X):
        return np.dot(X, self.weights) + self.bias

In [None]:
learning_rate = 0.01
iterations = 1000
model = LinearRegressionGD(learning_rate=learning_rate, iterations=iterations)
model.fit(X_train_scaled, y_train)

# Extracting weights and bias
optimal_weights = model.weights
optimal_bias = model.bias

# Plotting training cost vs. iterations
plt.plot(range(iterations), model.cost_history)
plt.title("Training Cost vs. Iterations")
plt.xlabel("Iterations")
plt.ylabel("Cost")
plt.grid()
plt.show()


In [None]:
y_test_pred = model.predict(X_test_scaled)

test_cost = np.mean((y_test - y_test_pred) ** 2)

print("Optimal Weights:", optimal_weights)
print("Optimal Bias:", optimal_bias)
print("Test Set Cost:", test_cost)