# Pandas
Read "10 minutes to Pandas": https://pandas.pydata.org/docs/user_guide/10min.html before solving the exercises.
We will use the data set "cars_data" in the exercises below. 

In [None]:
# Importing Pandas. 
import numpy as np
import pandas as pd

### Explain what a CSV file is.

A CSV file is simple text file which store tabular data such as spreadsheet or database. CSV file data represents a line of the table in row and each value separeted with semicolon in row. First row often contains the header and names.

### Load the data set "cars_data" through Pandas. 

In [None]:
# When reading in the data, either you have the data file in the same folder as your python script
# or in a seperate folder.

# Code below can be ran if you have the data file in the same folder as the script
# cars = pd.read_csv("cars_data.csv")

# Code below can be ran if you have the data file in another script. 
# Notice, you must change the path according to where you have the data in your computer. 
# pd.read_csv(r'C:\Users\Antonio Prgomet\Documents\ec_utbildning\kursframstallning\ds23\python_stat\exercises\numpy_matplot_pandas\cars_data.csv')

In [6]:
import pandas as pd
cars = pd.read_csv("cars_data.csv")


### Print the first 10 rows of the data. 

In [8]:
print(cars.head(10))

    index      company   body-style  wheel-base  length engine-type  \
0       0  alfa-romero  convertible        88.6   168.8        dohc   
1       1  alfa-romero  convertible        88.6   168.8        dohc   
2       2  alfa-romero    hatchback        94.5   171.2        ohcv   
3       3         audi        sedan        99.8   176.6         ohc   
4       4         audi        sedan        99.4   176.6         ohc   
5       5         audi        sedan        99.8   177.3         ohc   
6       6         audi        wagon       105.8   192.7         ohc   
7       9          bmw        sedan       101.2   176.8         ohc   
8      10          bmw        sedan       101.2   176.8         ohc   
9      11          bmw        sedan       101.2   176.8         ohc   
10     13          bmw        sedan       103.5   189.0         ohc   
11     14          bmw        sedan       103.5   193.8         ohc   
12     15          bmw        sedan       110.0   197.0         ohc   
13    

### Print the last 5 rows. 

In [5]:
cars = pd.read_csv("cars_data.csv")
print(cars.tail(5))

    index     company body-style  wheel-base  length engine-type  \
56     81  volkswagen      sedan        97.3   171.7         ohc   
57     82  volkswagen      sedan        97.3   171.7         ohc   
58     86  volkswagen      sedan        97.3   171.7         ohc   
59     87       volvo      sedan       104.3   188.8         ohc   
60     88       volvo      wagon       104.3   188.8         ohc   

   num-of-cylinders  horsepower  average-mileage    price  
56             four          85               27   7975.0  
57             four          52               37   7995.0  
58             four         100               26   9995.0  
59             four         114               23  12940.0  
60             four         114               23  13415.0  


### By using the info method, check how many non-null rows each column have. 

In [10]:
cars.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 61 entries, 0 to 60
Data columns (total 10 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   index             61 non-null     int64  
 1   company           61 non-null     object 
 2   body-style        61 non-null     object 
 3   wheel-base        61 non-null     float64
 4   length            61 non-null     float64
 5   engine-type       61 non-null     object 
 6   num-of-cylinders  61 non-null     object 
 7   horsepower        61 non-null     int64  
 8   average-mileage   61 non-null     int64  
 9   price             58 non-null     float64
dtypes: float64(3), int64(3), object(4)
memory usage: 4.9+ KB


### If any column has a missing value, drop the entire row. Notice, the operation should be inplace meaning you change the dataframe itself.

In [7]:
cars.dropna(inplace=True)
cars.info()


### Calculate the mean of each numeric column. 

In [28]:
select_dtype = cars.select_dtypes(include = ["number"])
print(select_dtype.mean())


index                 40.885246
wheel-base            98.481967
length               173.098361
horsepower           107.852459
average-mileage       25.803279
price              15387.000000
dtype: float64


### Select the rows where the column "company" is equal to 'honda'. 

In [25]:
print(cars[cars["company"]== "honda"])

    index company body-style  wheel-base  length engine-type num-of-cylinders  \
18     27   honda      wagon        96.5   157.1         ohc             four   
19     28   honda      sedan        96.5   175.4         ohc             four   
20     29   honda      sedan        96.5   169.1         ohc             four   

    horsepower  average-mileage    price  
18          76               30   7295.0  
19         101               24  12945.0  
20         100               25  10345.0  


### Sort the data set by price in descending order. This should *not* be an inplace operation. 

In [36]:
print(cars.sort_values(by = "price", ascending=False))

    index        company   body-style  wheel-base  length engine-type  \
35     47  mercedes-benz      hardtop       112.0   199.2        ohcv   
11     14            bmw        sedan       103.5   193.8         ohc   
34     46  mercedes-benz        sedan       120.9   208.1        ohcv   
46     62        porsche  convertible        89.5   168.9        ohcf   
12     15            bmw        sedan       110.0   197.0         ohc   
..    ...            ...          ...         ...     ...         ...   
27     36          mazda    hatchback        93.1   159.1         ohc   
13     16      chevrolet    hatchback        88.4   141.1           l   
22     31          isuzu        sedan        94.5   155.9         ohc   
23     32          isuzu        sedan        94.5   155.9         ohc   
47     63        porsche    hatchback        98.4   175.7       dohcv   

   num-of-cylinders  horsepower  average-mileage    price  
35            eight         184               14  45400.0  
11 

### Select the rows where the column "company" is equal to any of the values (audi, bmw, porsche).

In [37]:
print(cars[cars["company"].isin(["audi","bmw","porsche"])])

    index  company   body-style  wheel-base  length engine-type  \
3       3     audi        sedan        99.8   176.6         ohc   
4       4     audi        sedan        99.4   176.6         ohc   
5       5     audi        sedan        99.8   177.3         ohc   
6       6     audi        wagon       105.8   192.7         ohc   
7       9      bmw        sedan       101.2   176.8         ohc   
8      10      bmw        sedan       101.2   176.8         ohc   
9      11      bmw        sedan       101.2   176.8         ohc   
10     13      bmw        sedan       103.5   189.0         ohc   
11     14      bmw        sedan       103.5   193.8         ohc   
12     15      bmw        sedan       110.0   197.0         ohc   
45     61  porsche      hardtop        89.5   168.9        ohcf   
46     62  porsche  convertible        89.5   168.9        ohcf   
47     63  porsche    hatchback        98.4   175.7       dohcv   

   num-of-cylinders  horsepower  average-mileage    price  
3

### Find the number of cars (rows) for each company. 

In [43]:
print(cars.groupby('company').size())


company
alfa-romero      3
audi             4
bmw              6
chevrolet        3
dodge            2
honda            3
isuzu            3
jaguar           3
mazda            5
mercedes-benz    4
mitsubishi       4
nissan           5
porsche          3
toyota           7
volkswagen       4
volvo            2
dtype: int64


### Find the maximum price for each company. 

In [44]:
print(cars.groupby('company').max())

               index body-style  wheel-base  length engine-type  \
company                                                           
alfa-romero        2  hatchback        94.5   171.2        ohcv   
audi               6      wagon       105.8   192.7         ohc   
bmw               15      sedan       110.0   197.0         ohc   
chevrolet         18      sedan        94.5   158.8         ohc   
dodge             20  hatchback        93.7   157.3         ohc   
honda             29      wagon        96.5   175.4         ohc   
isuzu             32      sedan        94.5   170.7         ohc   
jaguar            35      sedan       113.0   199.6        ohcv   
mazda             43      sedan       104.9   175.0       rotor   
mercedes-benz     47      wagon       120.9   208.1        ohcv   
mitsubishi        52      sedan        96.3   172.4         ohc   
nissan            57      wagon       100.4   184.6        ohcv   
porsche           63  hatchback        98.4   175.7        ohc