# Import Packages

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn

## Q1 - Pandas Version

In [2]:
pd.__version__

'2.3.2'

# Import dataset

In [5]:
import requests

url = "https://raw.githubusercontent.com/alexeygrigorev/datasets/master/car_fuel_efficiency.csv"
r = requests.get(url)
with open("car_fuel_efficiency.csv", "wb") as f:
    f.write(r.content)

In [6]:
car_df = pd.read_csv("car_fuel_efficiency.csv")

In [9]:
car_df.head()

Unnamed: 0,engine_displacement,num_cylinders,horsepower,vehicle_weight,acceleration,model_year,origin,fuel_type,drivetrain,num_doors,fuel_efficiency_mpg
0,170,3.0,159.0,3413.433759,17.7,2003,Europe,Gasoline,All-wheel drive,0.0,13.231729
1,130,5.0,97.0,3149.664934,17.8,2007,USA,Gasoline,Front-wheel drive,0.0,13.688217
2,170,,78.0,3079.038997,15.1,2018,Europe,Gasoline,Front-wheel drive,0.0,14.246341
3,220,4.0,,2542.392402,20.2,2009,USA,Diesel,All-wheel drive,2.0,16.912736
4,210,1.0,140.0,3460.87099,14.4,2009,Europe,Gasoline,All-wheel drive,2.0,12.488369


## Q2 - Number of rows/entries in dataset

In [8]:
car_row_count = len(car_df)

print("Total rows in the car dataset: ", car_row_count)

Total rows in the car dataset:  9704


## Q3 - Number of distinct fuel types

In [12]:
car_fuel_col = car_df[["fuel_type"]]

unique_fuel_types = car_fuel_col.nunique()

print('Unique fuel type count:', unique_fuel_types)

Unique fuel type count: fuel_type    2
dtype: int64


## Q4 - Number of Cols With Missing Values

In [13]:
missing_counts = car_df.isnull().sum()
print(missing_counts)

engine_displacement      0
num_cylinders          482
horsepower             708
vehicle_weight           0
acceleration           930
model_year               0
origin                   0
fuel_type                0
drivetrain               0
num_doors              502
fuel_efficiency_mpg      0
dtype: int64


In [14]:
num_columns_with_missing = car_df.isnull().any().sum()
print("Number of columns with missing values:", num_columns_with_missing)

Number of columns with missing values: 4


## Q5 - Cars MAX Fuel Efficiency for Cars from Origin = Asia

In [23]:
unique_origins = car_df['origin'].unique()
print(unique_origins)

['Europe' 'USA' 'Asia']


In [None]:
cars_asia = car_df[car_df['origin'] == 'Asia']
print(cars_asia['origin'].unique())

['Asia']


In [33]:
max_asia_car_fuel_efficiency = cars_asia['fuel_efficiency_mpg'].max()
print('MAX fuel efficiency for cars from asia ONLY: ', max_asia_car_fuel_efficiency)

MAX fuel efficiency for cars from asia ONLY:  23.759122836520497


## Q6 - Median Horsepower

In [40]:
horsepower_col = car_df[["horsepower"]]
median_horsepower = horsepower_col.median()

print('Median horsepower: ', median_horsepower)

Median horsepower:  horsepower    149.0
dtype: float64


In [45]:
horsepower_mode = car_df["horsepower"].mode()[0]
#print(horsepower_mode)

missing_before = car_df['horsepower'].isnull().sum()
print("Missing values before fill:", missing_before)

#fillna all missing values with the mode
car_df["horsepower"] = car_df["horsepower"].fillna(horsepower_mode)

# Count missing values after fill
missing_after = car_df['horsepower'].isnull().sum()
print("Missing values after fill:", missing_after)

median_ffill_horsepower = car_df["horsepower"].median()
print('NEW MEDIAN after ffill: ', median_ffill_horsepower)


Missing values before fill: 0
Missing values after fill: 0
NEW MEDIAN after ffill:  152.0


## Q7 - Sum of Car Weights ASIA Only

In [46]:
print(cars_asia['origin'].unique())

['Asia']


In [47]:
asia_specific_cols = cars_asia[['vehicle_weight', 'model_year']]
asia_specific_cols.head()

Unnamed: 0,vehicle_weight,model_year
8,2714.21931,2016
12,2783.868974,2010
14,3582.687368,2007
20,2231.808142,2011
21,2659.431451,2016


In [49]:
# 1. select the first 7 entries
asia_first_seven = asia_specific_cols.head(7)

# 2. get the underlying Numpy array X
X = asia_first_seven.values

# 3. Compute matrix-matrix multiplication between the transpose of X and X. To get the transpose, use X.T. Let's call the result XTX.
XTX = np.dot(X.T, X)

# 4. Invert XTX
XTX_inv = np.linalg.inv(XTX)

# 5. Create the array y with specific values
y = np.array([1100, 1300, 800, 900, 1000, 1100, 1200])

# 6. Multiply the inverse of XTX with transpose of X, then multiply result by y
w = XTX_inv.dot(X.T).dot(y)

# 7. Sum all elements of w
result_sum = np.sum(w)

print("Sum of all elements:", result_sum)

Sum of all elements: 0.5187709081074016
