In [26]:
import pandas as pd

### DataTypes - Series - 1 Dimensional

In [27]:
car_brand_series = pd.Series(["BMW","Toyoto","Honda"])

In [28]:
car_brand_series

0       BMW
1    Toyoto
2     Honda
dtype: object

In [29]:
colour_type_series = pd.Series(["Red","Yellow","Black"])

In [30]:
colour_type_series

0       Red
1    Yellow
2     Black
dtype: object

### DataTypes - DataFrame - 2 Dimensional

In [31]:
car_data = pd.DataFrame({"Brand":car_brand_series,"Colour":colour_type_series})

In [32]:
car_data

Unnamed: 0,Brand,Colour
0,BMW,Red
1,Toyoto,Yellow
2,Honda,Black


### Import Data
- csv
    - row - (axis-0)
    - column - (axis-1)

In [33]:
car_sales = pd.read_csv('data/car-sales.csv')
car_sales

Unnamed: 0,Make,Colour,Odometer (KM),Doors,Price
0,Toyota,White,150043,4,"$4,000.00"
1,Honda,Red,87899,4,"$5,000.00"
2,Toyota,Blue,32549,3,"$7,000.00"
3,BMW,Black,11179,5,"$22,000.00"
4,Nissan,White,213095,4,"$3,500.00"
5,Toyota,Green,99213,4,"$4,500.00"
6,Honda,Blue,45698,4,"$7,500.00"
7,Honda,Blue,54738,4,"$7,000.00"
8,Toyota,White,60000,4,"$6,250.00"
9,Nissan,White,31600,4,"$9,700.00"


### Export Data

In [34]:
# Function
car_sales.to_csv('data/car-sales-op.csv', index=False)

### Describe Data

In [35]:
# Attribute - Column Data Types
car_sales.dtypes

Make             object
Colour           object
Odometer (KM)     int64
Doors             int64
Price            object
dtype: object

In [36]:
# Attribute - List Of Columns
car_sales.columns

Index(['Make', 'Colour', 'Odometer (KM)', 'Doors', 'Price'], dtype='object')

In [37]:
# Attribute - Detail About Index Column
car_sales.index

RangeIndex(start=0, stop=10, step=1)

In [38]:
# Function - Math Fn On Numeric Columns
car_sales.describe()

Unnamed: 0,Odometer (KM),Doors
count,10.0,10.0
mean,78601.4,4.0
std,61983.471735,0.471405
min,11179.0,3.0
25%,35836.25,4.0
50%,57369.0,4.0
75%,96384.5,4.0
max,213095.0,5.0


In [40]:
# Function - index + dtypes
car_sales.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10 entries, 0 to 9
Data columns (total 5 columns):
 #   Column         Non-Null Count  Dtype 
---  ------         --------------  ----- 
 0   Make           10 non-null     object
 1   Colour         10 non-null     object
 2   Odometer (KM)  10 non-null     int64 
 3   Doors          10 non-null     int64 
 4   Price          10 non-null     object
dtypes: int64(2), object(3)
memory usage: 532.0+ bytes


In [44]:
# Function - No Of Rows
len(car_sales)

10

In [39]:
# Function - mean/average values of Numeric Columns in a pandas DataFrame
car_sales.mean(numeric_only=True)

Odometer (KM)    78601.4
Doors                4.0
dtype: float64

In [43]:
# Mean On Series
age = pd.Series([10,20,30,40,50])
age.mean()

30.0

### Viewing/Selecting Data

In [70]:
# Selecting specific columns
car_sales["Make"] # Or car_sales.Make but this one doesn't work where column name has space

0    Toyota
1     Honda
2    Toyota
3       BMW
4    Nissan
5    Toyota
6     Honda
7     Honda
8    Toyota
9    Nissan
Name: Make, dtype: object

In [72]:
car_sales[car_sales['Odometer (KM)'] > 100000]

Unnamed: 0,Make,Colour,Odometer (KM),Doors,Price
0,Toyota,White,150043,4,"$4,000.00"
4,Nissan,White,213095,4,"$3,500.00"


In [46]:
# Top 5 Rows
car_sales.head()

Unnamed: 0,Make,Colour,Odometer (KM),Doors,Price
0,Toyota,White,150043,4,"$4,000.00"
1,Honda,Red,87899,4,"$5,000.00"
2,Toyota,Blue,32549,3,"$7,000.00"
3,BMW,Black,11179,5,"$22,000.00"
4,Nissan,White,213095,4,"$3,500.00"


In [47]:
car_sales.head(10)

Unnamed: 0,Make,Colour,Odometer (KM),Doors,Price
0,Toyota,White,150043,4,"$4,000.00"
1,Honda,Red,87899,4,"$5,000.00"
2,Toyota,Blue,32549,3,"$7,000.00"
3,BMW,Black,11179,5,"$22,000.00"
4,Nissan,White,213095,4,"$3,500.00"
5,Toyota,Green,99213,4,"$4,500.00"
6,Honda,Blue,45698,4,"$7,500.00"
7,Honda,Blue,54738,4,"$7,000.00"
8,Toyota,White,60000,4,"$6,250.00"
9,Nissan,White,31600,4,"$9,700.00"


In [49]:
# Botton Rows
car_sales.tail()

Unnamed: 0,Make,Colour,Odometer (KM),Doors,Price
5,Toyota,Green,99213,4,"$4,500.00"
6,Honda,Blue,45698,4,"$7,500.00"
7,Honda,Blue,54738,4,"$7,000.00"
8,Toyota,White,60000,4,"$6,250.00"
9,Nissan,White,31600,4,"$9,700.00"


In [62]:
# .loc & .iloc
animals = pd.Series(["Cat","Dog","Mouse","Monkey","Pig","Sparrow"],index=[0,8,2,3,2,5])

In [55]:
animals

0        Cat
1        Dog
2      Mouse
3     Monkey
2        Pig
5    Sparrow
dtype: object

In [63]:
# Fetch Data Based on Index Location
animals.loc[2]

2    Mouse
2      Pig
dtype: object

In [64]:
car_sales.loc[3]

Make                    BMW
Colour                Black
Odometer (KM)         11179
Doors                     5
Price            $22,000.00
Name: 3, dtype: object

In [65]:
# Fetch Row Number, count rows starting with 0
animals.iloc[4]

'Pig'

In [66]:
# Slicing with loc/iloc
animals.iloc[:4]

0       Cat
8       Dog
2     Mouse
3    Monkey
dtype: object

In [None]:
# Regex
car_sales["Price"] = car_sales["Price"].str.replace('[\$\,\.]', '', regex=True)
