# Pandas

In [2]:
import pandas as pd
import numpy as np

In [3]:
print(np.__version__)
print(pd.__version__)

2.0.2
2.3.3


In [4]:
root_dir = '/Volumes/data/documents/ai_document'

## Series

### Dictionary

In [90]:
data = {"a":10, "b":20, "c":30, "d":40, "e":50}
ds = pd.Series(data)

print("data:\n", ds, "\n")

data:
 a    10
b    20
c    30
d    40
e    50
dtype: int64 



### List

In [89]:
data = [10, 20, 30, 40, 50]
ds = pd.Series(data)

print("data:\n", ds, "\n")

data:
 0    10
1    20
2    30
3    40
4    50
dtype: int64 



In [91]:
data = [10, 20, 30, 40, 50]
ds = pd.Series(data, index=["a", "b", "c", "d", "e"])

print("data:\n", ds, "\n")

data:
 a    10
b    20
c    30
d    40
e    50
dtype: int64 



### Items

In [95]:
data = {"a":10, "b":20, "c":30, "d":40, "e":50}
ds = pd.Series(data)

In [96]:
print("Index:\n", ds.index, "\n")
print("Value:\n", ds.values, "\n")

Index:
 Index(['a', 'b', 'c', 'd', 'e'], dtype='object') 

Value:
 [10 20 30 40 50] 



In [97]:
v1 = ds["a"]
v2 = ds.a

print("v1:\n{}".format(v1))
print("\nv2:\n{}".format(v2))

v1:
10

v2:
10


In [86]:
v1 = ds[['a', 'b']]
v2 = ds[1:4]
v3 = ds["b":"d"]
v4 = ds[[True, False, False, False, False]]

print("v1:\n{}".format(v1))
print("\nv2:\n{}".format(v2))
print("\nv3:\n{}".format(v3))
print("\nv4:\n{}".format(v4))

v1:
a    10
b    20
dtype: int64

v2:
b    20
c    30
d    40
dtype: int64

v3:
b    20
c    30
d    40
dtype: int64

v4:
a    10
dtype: int64


In [87]:
v1 = ds.min()
v2 = ds.max()
v3 = ds.mean()
v4 = ds > ds.mean()
v5 = ds < ds.mean()

print("v1:\n{}".format(v1))
print("\nv2:\n{}".format(v2))
print("\nv3:\n{}".format(v3))
print("\nv4:\n{}".format(v4))
print("\nv5:\n{}".format(v5))

v1:
10

v2:
50

v3:
30.0

v4:
a    False
b    False
c    False
d     True
e     True
dtype: bool

v5:
a     True
b     True
c    False
d    False
e    False
dtype: bool


## DataFrame

In [148]:
data = {'name' : ['Ali', 'Sara', 'Morteza'],
        'age' : [30, 20, 40],
        'city' : ['Mashhad', 'Tehran', 'London'],
        }
df = pd.DataFrame(data)

### Part-1

In [150]:
print("Data:\n", df, '\n')
print("columns:\n", df.columns, "\n")
print("index:\n", df.index, "\n")
print("index:\n", list(df.index), "\n")
print("values:\n", df.values, "\n")
print("Shape: {}".format(df.shape))

Data:
       name  age     city
0      Ali   30  Mashhad
1     Sara   20   Tehran
2  Morteza   40   London 

columns:
 Index(['name', 'age', 'city'], dtype='object') 

index:
 RangeIndex(start=0, stop=3, step=1) 

index:
 [0, 1, 2] 

values:
 [['Ali' 30 'Mashhad']
 ['Sara' 20 'Tehran']
 ['Morteza' 40 'London']] 

Shape: (3, 3)


In [151]:
print("Item:\n", df["name"], "\n")
print("Item:\n", df.name, "\n")
print("Item:\n", df["name"][0], "\n")

Item:
 0        Ali
1       Sara
2    Morteza
Name: name, dtype: object 

Item:
 0        Ali
1       Sara
2    Morteza
Name: name, dtype: object 

Item:
 Ali 



### Part-2

In [143]:
print("info:",)
df.info()

info:
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3 entries, 0 to 2
Data columns (total 3 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   name    3 non-null      object
 1   age     3 non-null      int64 
 2   city    3 non-null      object
dtypes: int64(1), object(2)
memory usage: 200.0+ bytes


In [144]:
print("describe:\n", df.describe(include=["int64"]), "\n")

describe:
         age
count   3.0
mean   30.0
std    10.0
min    20.0
25%    25.0
50%    30.0
75%    35.0
max    40.0 



In [146]:
print("head:\n", df.head(2), "\n")
print("tail:\n", df.tail(2), "\n")
print("age:\n", df['age'] > 30)

head:
    name  age     city
0   Ali   30  Mashhad
1  Sara   20   Tehran 

tail:
       name  age    city
1     Sara   20  Tehran
2  Morteza   40  London 

age:
 0    False
1    False
2     True
Name: age, dtype: bool


### Iloc

In [155]:
print(df, "\n")
print(df.iloc[0, 0], "\n")
print(df.iloc[1:, :2], "\n")
print(df.iloc[[0, 1], -3:], "\n")

      name  age     city
0      Ali   30  Mashhad
1     Sara   20   Tehran
2  Morteza   40   London 

Ali 

      name  age
1     Sara   20
2  Morteza   40 

   name  age     city
0   Ali   30  Mashhad
1  Sara   20   Tehran 



In [156]:
print(df.loc[0, 'name'], "\n")
print(df.loc[0:1, "name":"city"], "\n")
print(df.loc[[0, 1], ["name", "city"]], "\n")

Ali 

   name  age     city
0   Ali   30  Mashhad
1  Sara   20   Tehran 

   name     city
0   Ali  Mashhad
1  Sara   Tehran 



## CSV

### Read

In [25]:
dataset = pd.read_csv(f"{root_dir}/dataset/iris_2.csv", sep=",", comment="#")

### Display

In [22]:
print(df.head(5), '\n')
rows, cols = dataset.shape
print("rows: {}, cols:{}".format(rows, cols))

   Id  SepalLengthCm  SepalWidthCm  PetalLengthCm  PetalWidthCm      Species
0   1            5.1           3.5            1.4           0.2  Iris-setosa
1   2            4.9           3.0            1.4           0.2  Iris-setosa
2   3            4.7           3.2            1.3           0.2  Iris-setosa
3   4            4.6           3.1            1.5           0.2  Iris-setosa
4   5            5.0           3.6            1.4           0.2  Iris-setosa 

rows: 150, cols:6


### X

In [23]:
x = dataset.iloc[:, 1:5]
print(x.head(5))

   SepalLengthCm  SepalWidthCm  PetalLengthCm  PetalWidthCm
0            5.1           3.5            1.4           0.2
1            4.9           3.0            1.4           0.2
2            4.7           3.2            1.3           0.2
3            4.6           3.1            1.5           0.2
4            5.0           3.6            1.4           0.2


### Y

In [24]:
y = dataset.iloc[:, 5]
print(y.head(5))

0    Iris-setosa
1    Iris-setosa
2    Iris-setosa
3    Iris-setosa
4    Iris-setosa
Name: Species, dtype: object


### dd