# Series DataFrame Panel

In [4]:
import pandas as pd
import numpy as np

### Series


##### Series from list and numpy

In [402]:
data = [15,12.5,'Ksb',22,"Jim"]

In [403]:
ps = pd.Series(data)

In [404]:
ndata = np.array(data)

In [405]:
pd.Series(ndata)

0      15
1    12.5
2     Ksb
3      22
4     Jim
dtype: object

##### Series กำหนด index

In [406]:
data = [1,2,3,4,5,6]
idx = ['a','b','c','d','e','f']

In [407]:
pd.Series(data , index=idx)

a    1
b    2
c    3
d    4
e    5
f    6
dtype: int64

In [408]:
pd.Series(np.array(data) , index = idx)

a    1
b    2
c    3
d    4
e    5
f    6
dtype: int32

### DataFrame

In [409]:
data = [1,2,3,4,5]
colmns = ['Number']
pd.DataFrame(data)

Unnamed: 0,0
0,1
1,2
2,3
3,4
4,5


In [410]:
pd.DataFrame(data , columns= colmns)


Unnamed: 0,Number
0,1
1,2
2,3
3,4
4,5


In [411]:
list01 = [1,2,3,4,5]
list02 = ['a','b','c','d','e']
colmns = ['Number','Alphabet']
data = list(zip(list01,list02))

In [412]:
pd.DataFrame(data , columns= colmns)

Unnamed: 0,Number,Alphabet
0,1,a
1,2,b
2,3,c
3,4,d
4,5,e


In [413]:
datas = [{'Name':'Somkiat' , 'Age': 20 , 'Score':50},
         {'Name':'Dumrong','Age':30,'Score':69}]

In [414]:
pd.DataFrame(datas ,index= [x for x in [1,2]])

Unnamed: 0,Name,Age,Score
1,Somkiat,20,50
2,Dumrong,30,69


##### การสร้าง data frame จาก matrix

In [415]:
data = [[1,2,3,4],
        [5,6,7,8],
        [7,8,9,10]]
colmns = ['a','b','c','d']
index = ['First','Second','Third']

In [416]:
pd.DataFrame(data , columns= colmns , index= index)

Unnamed: 0,a,b,c,d
First,1,2,3,4
Second,5,6,7,8
Third,7,8,9,10


#### การแทนที่ index

In [417]:
data = [['Wasan',18,177],['Prayuth',80,101],['Gojo',20,200]]
cols = ['Name','Age','Height']

In [418]:
df = pd.DataFrame(data, columns= cols)

In [419]:
df.set_index(['Name'] , inplace=True)

In [420]:
df

Unnamed: 0_level_0,Age,Height
Name,Unnamed: 1_level_1,Unnamed: 2_level_1
Wasan,18,177
Prayuth,80,101
Gojo,20,200


#### การสร้าง DataFrame จาก Series

Series -> Dict -> DataFrame

In [421]:
A = pd.Series([1,2,3,4,5,6])
B = pd.Series(['a','b','c','d','c','d'])

In [422]:
frame = {'Number':A,'Alphabet':B}

In [423]:
pd.DataFrame(frame, index=[1,2,3,4,5])

Unnamed: 0,Number,Alphabet
1,2,b
2,3,c
3,4,d
4,5,c
5,6,d


#### Saving as CSV

.to_csv(filepath , index =True or False , header = True or False)

In [424]:
C = pd.DataFrame(frame)

In [425]:
filepath = 'mydata.csv'
C.to_csv(filepath, index = False)

In [426]:
pd.read_csv(filepath , index_col= 'Number')

Unnamed: 0_level_0,Alphabet
Number,Unnamed: 1_level_1
1,a
2,b
3,c
4,d
5,c
6,d


In [427]:
pd.read_csv('mydata.csv'  , index_col= 'Number')

Unnamed: 0_level_0,Alphabet
Number,Unnamed: 1_level_1
1,a
2,b
3,c
4,d
5,c
6,d


In [428]:
cols = ['Alphabet']
filepath = 'data02.csv'
C.to_csv(filepath , columns= cols , index= False)
pd.read_csv('data02.csv' , index_col= 'Alphabet')

a
b
c
d
c
d


In [429]:
name = ['Somkiat','Decha','Dumrong','Winai','Bancha','Wiroj','Nicha']
age = [28,32,54,21,19,24,29]
score = [98,70,56,65,82,95,76]
data = {'Name':name , 'Age':age , 'Score':score}

In [430]:
df = pd.DataFrame(data , index= [x+1 for x in range(len(name))])

In [431]:
cols = ['Name','Score']
df[cols]

Unnamed: 0,Name,Score
1,Somkiat,98
2,Decha,70
3,Dumrong,56
4,Winai,65
5,Bancha,82
6,Wiroj,95
7,Nicha,76


In [432]:
df[df['Score'] > 90]

Unnamed: 0,Name,Age,Score
1,Somkiat,28,98
6,Wiroj,24,95


In [433]:
filepath = 'data_exam_02.csv'
df.to_csv(filepath , index= False)

In [434]:
pd.read_csv(filepath , index_col= 'Name')

Unnamed: 0_level_0,Age,Score
Name,Unnamed: 1_level_1,Unnamed: 2_level_1
Somkiat,28,98
Decha,32,70
Dumrong,54,56
Winai,21,65
Bancha,19,82
Wiroj,24,95
Nicha,29,76


In [435]:
cols = [0]

In [436]:
pd.read_csv(filepath , usecols= cols, index_col= [0])

Somkiat
Decha
Dumrong
Winai
Bancha
Wiroj
Nicha


## ตรวจสอบข้อมูลเบื้องต้น

In [437]:
data = pd.read_csv('data_exam_02.csv')

In [438]:
data.head(5)

Unnamed: 0,Name,Age,Score
0,Somkiat,28,98
1,Decha,32,70
2,Dumrong,54,56
3,Winai,21,65
4,Bancha,19,82


In [439]:
data.tail(3)

Unnamed: 0,Name,Age,Score
4,Bancha,19,82
5,Wiroj,24,95
6,Nicha,29,76


In [440]:
data.sample(3)

Unnamed: 0,Name,Age,Score
4,Bancha,19,82
6,Nicha,29,76
1,Decha,32,70


In [441]:
data.Score.describe()

count     7.000000
mean     77.428571
std      15.404081
min      56.000000
25%      67.500000
50%      76.000000
75%      88.500000
max      98.000000
Name: Score, dtype: float64

In [442]:
data.Score.sum()

542

#### การกำหนดชนิดข้อมูล

In [443]:
data.Score = data.Score.astype('category')
data.dtypes

Name       object
Age         int64
Score    category
dtype: object

#### กำหนดเงื่อนไข

In [444]:
data.Score = data.Score.astype('int')

In [445]:
data[(data.Score >= 30) & (data.Age >= 20)]

Unnamed: 0,Name,Age,Score
0,Somkiat,28,98
1,Decha,32,70
2,Dumrong,54,56
3,Winai,21,65
5,Wiroj,24,95
6,Nicha,29,76


In [446]:
data.loc[(data.Score>=30) | (data.Age >= 20)] # or

Unnamed: 0,Name,Age,Score
0,Somkiat,28,98
1,Decha,32,70
2,Dumrong,54,56
3,Winai,21,65
4,Bancha,19,82
5,Wiroj,24,95
6,Nicha,29,76


In [447]:
data[data.Score.isin([56,65,70])]

Unnamed: 0,Name,Age,Score
1,Decha,32,70
2,Dumrong,54,56
3,Winai,21,65


#### การ sort

In [448]:
data.sort_values('Score') # มากไปน้อย

Unnamed: 0,Name,Age,Score
2,Dumrong,54,56
3,Winai,21,65
1,Decha,32,70
6,Nicha,29,76
4,Bancha,19,82
5,Wiroj,24,95
0,Somkiat,28,98


In [449]:
data.sort_values('Score' , ascending= False , inplace = False )

Unnamed: 0,Name,Age,Score
0,Somkiat,28,98
5,Wiroj,24,95
4,Bancha,19,82
6,Nicha,29,76
1,Decha,32,70
3,Winai,21,65
2,Dumrong,54,56


#### Update ข้อมูล

In [450]:
data.head(4)

Unnamed: 0,Name,Age,Score
0,Somkiat,28,98
1,Decha,32,70
2,Dumrong,54,56
3,Winai,21,65


In [451]:
data.at[2,'Age'] = 99 # ใช้ cols = str

In [452]:
data.iat[2,1] = 54

In [453]:
data.Name.replace('Decha' , 'Dechar' , inplace=True)

In [454]:
data.loc[data.Name.str.match('Somkiat') , 'Score'] = 100
data.head(3)

Unnamed: 0,Name,Age,Score
0,Somkiat,28,100
1,Dechar,32,70
2,Dumrong,54,56


In [455]:
data.loc[0, 'Name'] = 'Wichai'

#### การเพิ่ม-ลด column 

In [456]:
data['Height'] = [160,177,158,178,165,176,177]

In [457]:
data.replace('Winai' , 'Vinai')

Unnamed: 0,Name,Age,Score,Height
0,Wichai,28,100,160
1,Dechar,32,70,177
2,Dumrong,54,56,158
3,Vinai,21,65,178
4,Bancha,19,82,165
5,Wiroj,24,95,176
6,Nicha,29,76,177


In [460]:
data['Status'] = 'pass'
data

Unnamed: 0,Name,Age,Score,Height,Status
0,Wichai,28,100,160,pass
1,Dechar,32,70,177,pass
2,Dumrong,54,56,158,pass
3,Winai,21,65,178,pass
4,Bancha,19,82,165,pass
5,Wiroj,24,95,176,pass
6,Nicha,29,76,177,pass


In [465]:
data['Status'][data['Score'] < 80] = 'Not Pass'

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['Status'][data['Score'] < 80] = 'Not Pass'


In [466]:
data

Unnamed: 0,Name,Age,Score,Height,Status
0,Wichai,28,100,160,pass
1,Dechar,32,70,177,Not Pass
2,Dumrong,54,56,158,Not Pass
3,Winai,21,65,178,Not Pass
4,Bancha,19,82,165,pass
5,Wiroj,24,95,176,pass
6,Nicha,29,76,177,Not Pass


In [467]:
data[data['Status'] == 'pass']

Unnamed: 0,Name,Age,Score,Height,Status
0,Wichai,28,100,160,pass
4,Bancha,19,82,165,pass
5,Wiroj,24,95,176,pass


#### การเพิ่ม-ลด row

In [16]:
datas = [['Wasan' ,18,60,177] , ['KGB',20,72,165]]
cols = ['Name','Age','Weight','Height']

In [17]:
df = pd.DataFrame(datas , columns= cols )
df.set_index('Name' , inplace = True)

In [18]:
df

Unnamed: 0_level_0,Age,Weight,Height
Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Wasan,18,60,177
KGB,20,72,165


In [25]:
newdata = [['Somchai' , 23 ,89,180] ,['Darinee' , 22,78,165]]
newdf = pd.DataFrame(newdata, columns=cols)

In [26]:
newdf.set_index('Name' , inplace=True)
newdf

Unnamed: 0_level_0,Age,Weight,Height
Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Somchai,23,89,180
Darinee,22,78,165


In [37]:
df.append(newdf)

  df.append(newdf)


Unnamed: 0_level_0,Age,Weight,Height
Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Wasan,18,60,177
KGB,20,72,165
Somchai,23,89,180
Darinee,22,78,165


In [109]:
df2 = df.append(newdf)

  df2 = df.append(newdf)


In [51]:
delete_row_name = 'KGB'

In [40]:
df2.drop(delete_row_name , axis = 0)

Unnamed: 0_level_0,Age,Weight,Height
Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Wasan,18,60,177
Somchai,23,89,180
Darinee,22,78,165


In [86]:
df2['Section'][df2['Weight'] >75] = 1

In [87]:
df2

Unnamed: 0_level_0,Age,Weight,Height,Section
Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Wasan,18,60,177,0
KGB,20,72,165,0
Somchai,23,89,180,1
Darinee,22,78,165,1


#### การ sum row column และ groupby

In [95]:
df2

Unnamed: 0_level_0,Age,Weight,Height,Section
Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Wasan,18,60,177,0
KGB,20,72,165,0
Somchai,23,89,180,1
Darinee,22,78,165,1


In [96]:
cols = ['Weight' , 'Height' , 'Section']

In [98]:
df2[cols].sum(axis=0)

Weight     299
Height     687
Section      2
dtype: int64

In [97]:
df2[cols].groupby(['Section']).sum()

Unnamed: 0_level_0,Weight,Height
Section,Unnamed: 1_level_1,Unnamed: 2_level_1
0,132,342
1,167,345


#### การแบ่งกลุ่ม cut

In [189]:
group = [15,20,30]
labels = ['Teen','Adult']

In [190]:
df2['Stage'] = pd.cut(df2['Age'] , bins=group , labels=labels)

In [191]:
df2

Unnamed: 0_level_0,Age,Weight,Height,Stage
Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Wasan,18,60,177,Teen
KGB,20,72,165,Teen
Somchai,23,89,180,Adult
Darinee,22,78,165,Adult


In [194]:
df2.groupby('Stage').count()

Unnamed: 0_level_0,Age,Weight,Height
Stage,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Teen,2,2,2
Adult,2,2,2
