In [2]:
# import libraries
import pandas as pd

In [3]:
# import dataset
cars = pd.read_csv("cars.csv")

In [4]:
# view the dataset
cars.head()

Unnamed: 0,mpg,cylinders,displacement,horsepower,weight,acceleration,model_year,origin,name
0,18.0,8,307.0,130.0,3504,12.0,70,usa,chevrolet chevelle malibu
1,15.0,8,350.0,165.0,3693,11.5,70,usa,buick skylark 320
2,18.0,8,318.0,150.0,3436,11.0,70,usa,plymouth satellite
3,16.0,8,304.0,150.0,3433,12.0,70,usa,amc rebel sst
4,17.0,8,302.0,140.0,3449,10.5,70,usa,ford torino


In [5]:
# view info for the dataset
cars.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 398 entries, 0 to 397
Data columns (total 9 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   mpg           398 non-null    float64
 1   cylinders     398 non-null    int64  
 2   displacement  398 non-null    float64
 3   horsepower    392 non-null    float64
 4   weight        398 non-null    int64  
 5   acceleration  398 non-null    float64
 6   model_year    398 non-null    int64  
 7   origin        398 non-null    object 
 8   name          398 non-null    object 
dtypes: float64(4), int64(3), object(2)
memory usage: 28.1+ KB


In [6]:
# select the 'mpg' variable as a copy
mpg = cars.mpg.copy()

In [8]:
# run describe on mpg
mpg.describe()

count    398.000000
mean      23.514573
std        7.815984
min        9.000000
25%       17.500000
50%       23.000000
75%       29.000000
max       46.600000
Name: mpg, dtype: float64

In [9]:
# max method
mpg.max()

46.6

In [11]:
# counts of all values in mpg
mpg.value_counts()

13.0    20
14.0    19
18.0    17
15.0    16
26.0    14
        ..
34.2     1
38.1     1
37.2     1
32.1     1
32.7     1
Name: mpg, Length: 129, dtype: int64

In [12]:
# frequencies of the values in the mpg column
mpg.value_counts(normalize = True)

13.0    0.050251
14.0    0.047739
18.0    0.042714
15.0    0.040201
26.0    0.035176
          ...   
34.2    0.002513
38.1    0.002513
37.2    0.002513
32.1    0.002513
32.7    0.002513
Name: mpg, Length: 129, dtype: float64

In [17]:
# arrange mpg values in ascending order
mpg.sort_values()

28      9.0
25     10.0
26     10.0
27     11.0
67     11.0
       ... 
326    43.4
394    44.0
325    44.3
329    44.6
322    46.6
Name: mpg, Length: 398, dtype: float64

In [18]:
# arrange mpg values in descending order
mpg.sort_values(ascending = False)

322    46.6
329    44.6
325    44.3
394    44.0
326    43.4
       ... 
67     11.0
27     11.0
26     10.0
25     10.0
28      9.0
Name: mpg, Length: 398, dtype: float64

In [19]:
# sort index value
mpg.sort_index()

0      18.0
1      15.0
2      18.0
3      16.0
4      17.0
       ... 
393    27.0
394    44.0
395    32.0
396    28.0
397    31.0
Name: mpg, Length: 398, dtype: float64

In [21]:
# create a new pandas series
litres_per_kilometre = (235.21/mpg).round(2)

# view the new variable
litres_per_kilometre.head()

322    5.05
329    5.27
325    5.31
394    5.35
326    5.42
Name: mpg, dtype: float64

In [23]:
# create a new variable for origin column
origin = cars.origin.copy()

# view the origin series
origin.head()

0    usa
1    usa
2    usa
3    usa
4    usa
Name: origin, dtype: object

In [25]:
origin.value_counts(normalize = True)

usa       0.625628
japan     0.198492
europe    0.175879
Name: origin, dtype: float64

In [26]:
# check the index
cars.index

RangeIndex(start=0, stop=398, step=1)

In [27]:
# set the 'name' column as the index
cars.set_index('name', inplace = True)

In [29]:
# check if index has unique values
cars.index.is_unique

False

In [30]:
# most counts for index
cars.index.value_counts()

ford pinto            6
toyota corolla        5
ford maverick         5
vw rabbit             5
amc matador           5
                     ..
vw super beetle       1
volvo diesel          1
dodge d100            1
plymouth horizon 4    1
plymouth champ        1
Name: name, Length: 300, dtype: int64

In [32]:
# set the name of the index to car model
cars.index.name = 'car_model'

In [33]:
cars.head()

Unnamed: 0_level_0,mpg,cylinders,displacement,horsepower,weight,acceleration,model_year,origin
car_model,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
chevrolet chevelle malibu,18.0,8,307.0,130.0,3504,12.0,70,usa
buick skylark 320,15.0,8,350.0,165.0,3693,11.5,70,usa
plymouth satellite,18.0,8,318.0,150.0,3436,11.0,70,usa
amc rebel sst,16.0,8,304.0,150.0,3433,12.0,70,usa
ford torino,17.0,8,302.0,140.0,3449,10.5,70,usa


In [34]:
# reset the index
cars.reset_index(inplace = True)

In [35]:
cars.head()

Unnamed: 0,car_model,mpg,cylinders,displacement,horsepower,weight,acceleration,model_year,origin
0,chevrolet chevelle malibu,18.0,8,307.0,130.0,3504,12.0,70,usa
1,buick skylark 320,15.0,8,350.0,165.0,3693,11.5,70,usa
2,plymouth satellite,18.0,8,318.0,150.0,3436,11.0,70,usa
3,amc rebel sst,16.0,8,304.0,150.0,3433,12.0,70,usa
4,ford torino,17.0,8,302.0,140.0,3449,10.5,70,usa


In [36]:
# view the columns
cars.columns

Index(['car_model', 'mpg', 'cylinders', 'displacement', 'horsepower', 'weight',
       'acceleration', 'model_year', 'origin'],
      dtype='object')

In [37]:
# rename the columns
cars.rename(columns = {"horsepower":"hp", "origin":"country"}, inplace = True)

In [38]:
cars.head()

Unnamed: 0,car_model,mpg,cylinders,displacement,hp,weight,acceleration,model_year,country
0,chevrolet chevelle malibu,18.0,8,307.0,130.0,3504,12.0,70,usa
1,buick skylark 320,15.0,8,350.0,165.0,3693,11.5,70,usa
2,plymouth satellite,18.0,8,318.0,150.0,3436,11.0,70,usa
3,amc rebel sst,16.0,8,304.0,150.0,3433,12.0,70,usa
4,ford torino,17.0,8,302.0,140.0,3449,10.5,70,usa
