# Pandas Series

In [1]:
import numpy as np
import pandas as pd

In [22]:
a = list(pd.Series([1, 2, 3, 4]))
b = list(pd.Series([5, 2, 3, 6]))

c = [i for i in a if i in b]

c = pd.Series(c)

print(c)

0    2
1    3
dtype: int64


In [2]:
indexes = ['a', 'b', 'c']
my_data = [10, 20, 30]

In [3]:
s = pd.Series(data=my_data, index=indexes)

      # or
    
s = pd.Series(my_data, indexes)

In [4]:
s

a    10
b    20
c    30
dtype: int64

In [5]:
s['a']

10

In [6]:
print(s['b'])

20


In [7]:
type(s)

pandas.core.series.Series

# Pandas DataFrames

In [8]:
indexes = ['A', 'B']
columns = ['first', 'second', 'third', 'fourth', 'fifth']
my_data = [[1, 2, 3, 4, 5], [6, 7, 8, 9, 10]]

In [9]:
df = pd.DataFrame(my_data, index=indexes, columns=columns)

In [10]:
df

Unnamed: 0,first,second,third,fourth,fifth
A,1,2,3,4,5
B,6,7,8,9,10


In [11]:
type(df)

pandas.core.frame.DataFrame

In [12]:
df['first']

A    1
B    6
Name: first, dtype: int64

In [13]:
df['first']['A']

1

In [14]:
type(df['first'])

pandas.core.series.Series

In [15]:
df[['first', 'second']]

Unnamed: 0,first,second
A,1,2
B,6,7


In [16]:
# creating new column with added values from other columns

df['new'] = df['first'] + df['second']

In [17]:
df

Unnamed: 0,first,second,third,fourth,fifth,new
A,1,2,3,4,5,3
B,6,7,8,9,10,13


In [18]:
df.drop(columns='new')

       # or

df.drop('new', axis=1)

Unnamed: 0,first,second,third,fourth,fifth
A,1,2,3,4,5
B,6,7,8,9,10


In [19]:
df.shape # the first axis '0' represents rows = 2, and the last axis '1' represents columns = 6

(2, 6)

In [20]:
df.loc['A'] # all column values in index 'A'

first     1
second    2
third     3
fourth    4
fifth     5
new       3
Name: A, dtype: int64

In [21]:
df.iloc[:] # all rows

Unnamed: 0,first,second,third,fourth,fifth,new
A,1,2,3,4,5,3
B,6,7,8,9,10,13


In [22]:
df.iloc[:, :] # all rows with all columns

Unnamed: 0,first,second,third,fourth,fifth,new
A,1,2,3,4,5,3
B,6,7,8,9,10,13


In [23]:
df.iloc[[0,1], [0,2]] # the first and second row with the first and third column

Unnamed: 0,first,third
A,1,3
B,6,8


In [24]:
df.iloc[0:1, 0:3] # the first row with three columns

Unnamed: 0,first,second,third
A,1,2,3


In [25]:
df.iloc[:, :1] # all rows with the first column

Unnamed: 0,first
A,1
B,6


In [26]:
df.iloc[:, :-1] # all rows with all columns except the last column

Unnamed: 0,first,second,third,fourth,fifth
A,1,2,3,4,5
B,6,7,8,9,10


In [27]:
df.iloc[1, 2] # second row third column 

8

In [28]:
df.iloc[lambda x: x.index != "A"] # all rows whose index is not equal to A

Unnamed: 0,first,second,third,fourth,fifth,new
B,6,7,8,9,10,13


In [29]:
df > 5

Unnamed: 0,first,second,third,fourth,fifth,new
A,False,False,False,False,False,False
B,True,True,True,True,True,True


In [30]:
df[df > 5] # all values > 5

Unnamed: 0,first,second,third,fourth,fifth,new
A,,,,,,
B,6.0,7.0,8.0,9.0,10.0,13.0


In [31]:
df

Unnamed: 0,first,second,third,fourth,fifth,new
A,1,2,3,4,5,3
B,6,7,8,9,10,13


In [32]:
df[df['second'] < 5] # all rows in second column with values < 5

Unnamed: 0,first,second,third,fourth,fifth,new
A,1,2,3,4,5,3


In [33]:
df[df['second'] > 5] # all rows in second column with values > 5

Unnamed: 0,first,second,third,fourth,fifth,new
B,6,7,8,9,10,13


In [34]:
df[df['second'] > 5]['second'] # all values in second column which > 5

B    7
Name: second, dtype: int64

In [35]:
df[df['second'] > 5][['second', 'third']] # all values in second and third columns whose values in second column > 5

Unnamed: 0,second,third
B,7,8


In [36]:
df

Unnamed: 0,first,second,third,fourth,fifth,new
A,1,2,3,4,5,3
B,6,7,8,9,10,13


In [37]:
df[(df['second'] > 5) & (df['third'] > 5)] # using "and" operator

Unnamed: 0,first,second,third,fourth,fifth,new
B,6,7,8,9,10,13


In [38]:
df[(df['second'] > 5) | (df['third'] < 5)] # using "or" operator

Unnamed: 0,first,second,third,fourth,fifth,new
A,1,2,3,4,5,3
B,6,7,8,9,10,13


In [39]:
df

Unnamed: 0,first,second,third,fourth,fifth,new
A,1,2,3,4,5,3
B,6,7,8,9,10,13


In [40]:
df.reset_index()

Unnamed: 0,index,first,second,third,fourth,fifth,new
0,A,1,2,3,4,5,3
1,B,6,7,8,9,10,13


In [41]:
df

Unnamed: 0,first,second,third,fourth,fifth,new
A,1,2,3,4,5,3
B,6,7,8,9,10,13


In [42]:
 df.reset_index(drop=True)

Unnamed: 0,first,second,third,fourth,fifth,new
0,1,2,3,4,5,3
1,6,7,8,9,10,13


In [43]:
df

Unnamed: 0,first,second,third,fourth,fifth,new
A,1,2,3,4,5,3
B,6,7,8,9,10,13


In [44]:
df.index

Index(['A', 'B'], dtype='object')

In [45]:
df.set_index('new') # set index to become a 'new' column

Unnamed: 0_level_0,first,second,third,fourth,fifth
new,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
3,1,2,3,4,5
13,6,7,8,9,10


In [46]:
df

Unnamed: 0,first,second,third,fourth,fifth,new
A,1,2,3,4,5,3
B,6,7,8,9,10,13


In [52]:
df.index.names = [None]

In [53]:
df

Unnamed: 0,first,second,third,fourth,fifth,new
A,1,2,3,4,5,3
B,6,7,8,9,10,13
