### Importing Libraries

In [None]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from matplotlib.figure import Figure
from mpl_toolkits.mplot3d import Axes3D
from matplotlib.colors import ListedColormap

### Loading Data

In [None]:
df = pd.read_csv("/kaggle/input/sri-lanka-vehicle-prices-dataset/vehicle_data.csv")

### Explolatory Data Analysis

In [None]:
df.head()

In [None]:
df.isnull().sum()

In [None]:
### Converting Columns into appropiate data types
def extract_num(row):
    amount = ""
    for w in row:
        if w.isnumeric():
            amount += w
    return int(amount)

df["Price"] = df["Price"].apply(extract_num)
df["Mileage"] = df["Mileage"].apply(extract_num)
df["Capacity"] = df["Capacity"].apply(extract_num)

df = df.rename({"Price":"Price (Rs)", "Mileage":"Mileage (km)", "Capacity": "Capacity (cc)"}, axis=1)

In [None]:
df.describe()

In [None]:
### Obtaining the Districts from given location

df["District"] = df["Location"].apply(lambda x : x.split(", ")[-1])

### Data Visualization

In [None]:
### Plotting top 5 Sellers who have posted the most ads

top_10_seller = df[["Title", "Seller_name"]].groupby("Seller_name").agg(['count'])['Title']['count'].sort_values(ascending=False)[:5]
fig = sns.barplot( x = top_10_seller.index, y = top_10_seller.values)
fig.set_xticklabels(labels=top_10_seller.index , rotation=45)
fig.set_ylabel("Number of Posted Ads")
fig.set_xlabel("Seller Name")
fig.set_title("Top 5 Sellers who have posted the most ads");

In [None]:
### Plotting top 5 Most Car Brands Availableto Buy

top_10_car_brand = df[["Title", "Brand"]].groupby("Brand").agg(['count'])['Title']['count'].sort_values(ascending=False)[:5]
fig = sns.barplot( x = top_10_car_brand.index, y = top_10_car_brand.values)
fig.set_xticklabels(labels=top_10_car_brand.index , rotation=45)
fig.set_ylabel("Number of Cars For Sale")
fig.set_xlabel("Car Brand")
fig.set_title("Top 5 Most Available Car Brands to Buy");

In [None]:
# Average Price of Carsover the Years

fig = sns.lineplot(data=df, x="Year", y="Price (Rs)")
fig.set_title("Average Price of Cars Over the Years"); 

In [None]:
# Correlation of Numeric Features in the Dataset
fig = sns.heatmap(df.corr())
fig.set_title("Correlation of Numeric Features");

In [None]:
### 3D plot for Capacity, Price and Year.

fig = Figure()
ax = plt.axes(projection='3d')
ax.scatter3D(df["Year"], df["Capacity (cc)"], df["Price (Rs)"], c=df["Price (Rs)"])
ax.set_xlabel("Year")
ax.set_ylabel("Capacity (cc)")
ax.set_zlabel("Price (Rs)");