In [1]:
import pandas as pd

In [2]:
# check pandas version
pd.__version__

'1.4.2'

## pd.Series(data,index,dtype)

In [3]:
pd.Series([10,20,30,40,50])

0    10
1    20
2    30
3    40
4    50
dtype: int64

In [4]:
# if we want to customize index
pd.Series(data=[10,20,30,40,50],index=[101,102,103,104,105])

101    10
102    20
103    30
104    40
105    50
dtype: int64

In [5]:
x = pd.Series(data=[10,20,30,40,50],index=[101,102,103,104,105])
x[103]

30

In [6]:
x[102:104]  # CUSTOMIZED INDEXING WONT WORK IN SLICING

Series([], dtype: int64)

In [7]:
x

101    10
102    20
103    30
104    40
105    50
dtype: int64

In [8]:
x[1:3] 

102    20
103    30
dtype: int64

In [9]:
list('abcde')

['a', 'b', 'c', 'd', 'e']

In [10]:
y = pd.Series(data=[10,20,30,40,50],index=list('abcde'))
y

a    10
b    20
c    30
d    40
e    50
dtype: int64

In [11]:
# index
y['a']

10

In [12]:
y['a'::2]

a    10
c    30
e    50
dtype: int64

In [13]:
y['b':'d'] # stop is inclusive + customization of char index in possible

b    20
c    30
d    40
dtype: int64

### Check Properties of a series

In [14]:
x

101    10
102    20
103    30
104    40
105    50
dtype: int64

In [15]:
x.ndim

1

In [16]:
type(x)

pandas.core.series.Series

In [17]:
# if we want to access only index
x.index

Int64Index([101, 102, 103, 104, 105], dtype='int64')

In [18]:
y.index

Index(['a', 'b', 'c', 'd', 'e'], dtype='object')

In [19]:
# if want to access values
x.values

array([10, 20, 30, 40, 50], dtype=int64)

In [20]:
# check total number of elements
x.size

5

In [21]:
x.shape

(5,)

In [22]:
# check data type
x.dtype

dtype('int64')

#### 2. pd.DataFrame structure:
 - pd.DataFrame(
   - data=None,
   - index: 'Axes | None' = None,
   - columns: 'Axes | None' = None,
   - dtype: 'Dtype | None' = None,
   - copy: 'bool | None' = None,
)

In [23]:
# 2D: row and column

In [24]:
print(pd.DataFrame())

Empty DataFrame
Columns: []
Index: []


In [25]:
pd.DataFrame().dtypes

Series([], dtype: object)

In [26]:
k = [12,34,56,67]
pd.DataFrame(k)

Unnamed: 0,0
0,12
1,34
2,56
3,67


In [27]:
pd.Series(k)

0    12
1    34
2    56
3    67
dtype: int64

#### pd.DataFrame(data,index,columns,dtype)

In [28]:
pd.DataFrame([1,2,3,4])

Unnamed: 0,0
0,1
1,2
2,3
3,4


In [29]:
pd.DataFrame([1,2.,3,4]) # implicit typecasting

Unnamed: 0,0
0,1.0
1,2.0
2,3.0
3,4.0


In [30]:
a = pd.DataFrame(['1','2','3'])
a

Unnamed: 0,0
0,1
1,2
2,3


In [31]:
a.dtypes

0    object
dtype: object

In [32]:
a2 = a.astype('int')
a2

Unnamed: 0,0
0,1
1,2
2,3


In [33]:
a2.dtypes

0    int32
dtype: object

In [34]:
a.astype('int16')

Unnamed: 0,0
0,1
1,2
2,3


In [35]:
a.astype('float16')

Unnamed: 0,0
0,1.0
1,2.0
2,3.0


#### Biggest advantage is we can create a df with different data types

In [36]:
d ={'name':['A','B','C','D'],'age':[23,45,67,44],'salary':[24000.,45000,56000,78000]}
d

{'name': ['A', 'B', 'C', 'D'],
 'age': [23, 45, 67, 44],
 'salary': [24000.0, 45000, 56000, 78000]}

In [37]:
h = pd.DataFrame(d)
h

Unnamed: 0,name,age,salary
0,A,23,24000.0
1,B,45,45000.0
2,C,67,56000.0
3,D,44,78000.0


In [38]:
h.dtypes

name       object
age         int64
salary    float64
dtype: object

### We can create a df using a¶
 - list
 - Series
 - numpy
 - range
 - str
 - dict

In [39]:
pd.DataFrame([10,20,30])

Unnamed: 0,0
0,10
1,20
2,30


In [40]:
# list of list: one list is one row
pd.DataFrame([[10,20,30],[12,35,46]])

Unnamed: 0,0,1,2
0,10,20,30
1,12,35,46


In [41]:
# using Series
pd.DataFrame(pd.Series([101,102,103,104]))

Unnamed: 0,0
0,101
1,102
2,103
3,104


In [42]:
# list of Series
data = [pd.Series([101,102,103,104]),pd.Series([101,102,103,104]),pd.Series([101,102,103,104])]
data

[0    101
 1    102
 2    103
 3    104
 dtype: int64,
 0    101
 1    102
 2    103
 3    104
 dtype: int64,
 0    101
 1    102
 2    103
 3    104
 dtype: int64]

In [43]:
pd.DataFrame(data)

Unnamed: 0,0,1,2,3
0,101,102,103,104
1,101,102,103,104
2,101,102,103,104


In [44]:
# using numpy
import numpy as np
data2 = np.random.random((4,4))
data2

array([[0.59874291, 0.54879583, 0.37591268, 0.19180355],
       [0.81847175, 0.67889735, 0.55816159, 0.26692212],
       [0.81968536, 0.63336882, 0.83809309, 0.46250072],
       [0.7605743 , 0.85050105, 0.32636451, 0.24335082]])

In [45]:
pd.DataFrame(data2)

Unnamed: 0,0,1,2,3
0,0.598743,0.548796,0.375913,0.191804
1,0.818472,0.678897,0.558162,0.266922
2,0.819685,0.633369,0.838093,0.462501
3,0.760574,0.850501,0.326365,0.243351


In [46]:
# customize column labels
pd.DataFrame(data2,columns=['val1','val2','val3','val4'])

Unnamed: 0,val1,val2,val3,val4
0,0.598743,0.548796,0.375913,0.191804
1,0.818472,0.678897,0.558162,0.266922
2,0.819685,0.633369,0.838093,0.462501
3,0.760574,0.850501,0.326365,0.243351


In [47]:
# using a dict
d

{'name': ['A', 'B', 'C', 'D'],
 'age': [23, 45, 67, 44],
 'salary': [24000.0, 45000, 56000, 78000]}

In [48]:
pd.DataFrame(d)

Unnamed: 0,name,age,salary
0,A,23,24000.0
1,B,45,45000.0
2,C,67,56000.0
3,D,44,78000.0


In [49]:
d['name']

['A', 'B', 'C', 'D']

In [50]:
d['salary']

[24000.0, 45000, 56000, 78000]