In [5]:
'''
INTRODUCTION TO PANDAS 🐼
Python library for data analysis and manipulation

Two main data structures
1D - Series (list)
2D - DataFrame (matrix)

Why ?
- Support for CSV, Excel, SQL, JSON etc
- Powerful data aggregation and grouping
- Easy handling of missing
- Convenient for filtering, joining, and reshaping
'''

'\nINTRODUCTION TO PANDAS 🐼\nPython library for data analysis and manipulation\n\nTwo main data structures\n1D - Series (list)\n2D - DataFrame (matrix)\n\nWhy ?\n- Support for CSV, Excel, SQL, JSON etc\n- Powerful data aggregation and grouping\n- Easy handling of missing\n- Convenient for filtering, joining, and reshaping\n'

In [6]:
import numpy as np
import pandas as pd

In [18]:
friends = {
    "name": ["Nitin", "Shyam", "Pavan"],
    "city": ["Bangalore", "Delhi", "Vishakha"],
    "marks": [98, 99, 100]
}

In [19]:
# Create dataframe
df = pd.DataFrame(friends)

In [20]:
df

Unnamed: 0,name,city,marks
0,Nitin,Bangalore,98
1,Shyam,Delhi,99
2,Pavan,Vishakha,100


In [21]:
# Saving dataframe to files
df.to_csv('friends.csv')

In [22]:
# Saving dataframe to files without index
df.to_csv('friends.csv', index=False)

In [23]:
# top of dataframe
df.head(2)

Unnamed: 0,name,city,marks
0,Nitin,Bangalore,98
1,Shyam,Delhi,99


In [24]:
# bottom of dataframe
df.tail(2)

Unnamed: 0,name,city,marks
1,Shyam,Delhi,99
2,Pavan,Vishakha,100


In [25]:
df

Unnamed: 0,name,city,marks
0,Nitin,Bangalore,98
1,Shyam,Delhi,99
2,Pavan,Vishakha,100


In [27]:
df.describe()

Unnamed: 0,marks
count,3.0
mean,99.0
std,1.0
min,98.0
25%,98.5
50%,99.0
75%,99.5
max,100.0


In [28]:
friendsRead = pd.read_csv('friends-read.csv')

In [29]:
friendsRead

Unnamed: 0,name,city,marks
0,Nitin,Bangalore,98
1,Shyam,Delhi,99
2,Pavan,Vishakha,100
3,Anjali,Mumbai,95
4,Ravi,Chennai,88
5,Meena,Hyderabad,92
6,Aman,Pune,85
7,Sneha,Kolkata,93
8,Raj,Ahmedabad,90
9,Divya,Jaipur,97


In [30]:
friendsRead.describe()

Unnamed: 0,marks
count,10.0
mean,93.7
std,4.98999
min,85.0
25%,90.5
50%,94.0
75%,97.75
max,100.0


In [34]:
# matrix - [row][column]
# df - [col][row]
friendsRead['name']

0     Nitin
1     Shyam
2     Pavan
3    Anjali
4      Ravi
5     Meena
6      Aman
7     Sneha
8       Raj
9     Divya
Name: name, dtype: object

In [35]:
# specific values
friendsRead['name'][2]

'Pavan'

In [36]:
# specific values update
friendsRead['name'][2] = 'PAVAN'

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  friendsRead['name'][2] = 'PAVAN'


In [37]:
friendsRead

Unnamed: 0,name,city,marks
0,Nitin,Bangalore,98
1,Shyam,Delhi,99
2,PAVAN,Vishakha,100
3,Anjali,Mumbai,95
4,Ravi,Chennai,88
5,Meena,Hyderabad,92
6,Aman,Pune,85
7,Sneha,Kolkata,93
8,Raj,Ahmedabad,90
9,Divya,Jaipur,97


In [39]:
friendsRead.to_csv('friends-read.csv')

In [40]:
friendsRead.index = ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j',]

In [41]:
friendsRead

Unnamed: 0,name,city,marks
a,Nitin,Bangalore,98
b,Shyam,Delhi,99
c,PAVAN,Vishakha,100
d,Anjali,Mumbai,95
e,Ravi,Chennai,88
f,Meena,Hyderabad,92
g,Aman,Pune,85
h,Sneha,Kolkata,93
i,Raj,Ahmedabad,90
j,Divya,Jaipur,97


In [42]:
sr = pd.Series([1,2,3,4,5])

In [43]:
sr

0    1
1    2
2    3
3    4
4    5
dtype: int64

In [44]:
type(sr)

pandas.core.series.Series

In [45]:
type(friendsRead)

pandas.core.frame.DataFrame

In [48]:
sr1 = pd.Series(np.random.randint(1, 11, size=10))

In [49]:
sr1

0     2
1     5
2     5
3     2
4     8
5     1
6    10
7     2
8     3
9     2
dtype: int64

In [53]:
randomDf = pd.DataFrame(np.random.randint(1, 101, size=(5,5)), index=np.arange(5))

In [54]:
randomDf

Unnamed: 0,0,1,2,3,4
0,61,95,34,81,50
1,40,34,57,98,88
2,32,73,72,49,7
3,44,71,44,39,62
4,58,64,55,46,61


In [55]:
randomDf.describe()

Unnamed: 0,0,1,2,3,4
count,5.0,5.0,5.0,5.0,5.0
mean,47.0,67.4,52.4,62.6,53.6
std,12.247449,21.984085,14.328294,25.540164,29.55165
min,32.0,34.0,34.0,39.0,7.0
25%,40.0,64.0,44.0,46.0,50.0
50%,44.0,71.0,55.0,49.0,61.0
75%,58.0,73.0,57.0,81.0,62.0
max,61.0,95.0,72.0,98.0,88.0


In [56]:
randomDf1 = pd.DataFrame(np.random.randint(1, 101, size=(200,5)), index=np.arange(200))

In [57]:
randomDf1

Unnamed: 0,0,1,2,3,4
0,6,84,17,90,46
1,67,28,98,85,30
2,40,23,35,14,76
3,70,34,94,19,89
4,86,4,20,95,71
...,...,...,...,...,...
195,19,96,59,89,51
196,84,58,48,9,55
197,37,47,90,78,73
198,56,10,99,61,53


In [58]:
randomDf1.head(20)

Unnamed: 0,0,1,2,3,4
0,6,84,17,90,46
1,67,28,98,85,30
2,40,23,35,14,76
3,70,34,94,19,89
4,86,4,20,95,71
5,79,18,57,91,69
6,87,9,59,99,31
7,85,89,99,35,100
8,16,28,88,62,100
9,8,30,67,25,16


In [59]:
randomDf1.tail(20)

Unnamed: 0,0,1,2,3,4
180,58,76,38,74,44
181,42,35,75,43,41
182,46,61,91,87,47
183,53,26,11,40,67
184,78,25,7,77,99
185,61,96,80,94,70
186,3,35,47,55,12
187,61,79,39,17,27
188,32,31,97,6,68
189,59,79,9,11,78


In [60]:
randomDf1.index

Index([  0,   1,   2,   3,   4,   5,   6,   7,   8,   9,
       ...
       190, 191, 192, 193, 194, 195, 196, 197, 198, 199],
      dtype='int64', length=200)

In [61]:
randomDf1.columns

RangeIndex(start=0, stop=5, step=1)

In [63]:
randomDf1.to_numpy()

array([[  6,  84,  17,  90,  46],
       [ 67,  28,  98,  85,  30],
       [ 40,  23,  35,  14,  76],
       [ 70,  34,  94,  19,  89],
       [ 86,   4,  20,  95,  71],
       [ 79,  18,  57,  91,  69],
       [ 87,   9,  59,  99,  31],
       [ 85,  89,  99,  35, 100],
       [ 16,  28,  88,  62, 100],
       [  8,  30,  67,  25,  16],
       [ 24,  29,  94,  95,  68],
       [ 63,  54,  71,  62,  18],
       [ 55,  56,  30,  27,  17],
       [ 88,  37,  83,  75,  97],
       [ 20,  73,  58,  34,  84],
       [ 87,  45,  48,   1,  74],
       [  9,   5,  20,  72,  85],
       [ 88,  53,  53,  82,  20],
       [ 33,  22,  70,  73,  77],
       [ 84,  43,  96,  15,  52],
       [  8,  34,  60,  41,  17],
       [ 61,  83,  18,  66,  71],
       [ 40,  80,  93,  63,  25],
       [ 67,  48,  18,   8,  96],
       [ 23,  24,  43,  22,  71],
       [ 47,  67,  31,   8,  17],
       [ 56,  99,   6,  17,  22],
       [ 94,  66,  95,  10,  69],
       [  1,  61,  87,   7,  90],
       [ 98,  

In [66]:
s1 = pd.Series([10,20,30], index=['a', 'b', 'c'])

In [67]:
s1

a    10
b    20
c    30
dtype: int64

In [68]:
s1['a']

10

In [83]:
df1= pd.DataFrame([
    ['Tom', 28],
    ['Jerry', 31]
], columns=['Name', 'Age'])

In [71]:
df1

Unnamed: 0,Name,Age
0,Tom,28
1,Jerry,31


In [84]:
df1['City'] = ['Delhi', 'Mumbai']

In [73]:
df1

Unnamed: 0,Name,Age,City
0,Tom,28,Delhi
1,Jerry,31,Mumbai


In [85]:
# axis 
# 0 - row (default)
# 1 - column
df1.drop('Age', axis=1) # view

Unnamed: 0,Name,City
0,Tom,Delhi
1,Jerry,Mumbai


In [86]:
df1

Unnamed: 0,Name,Age,City
0,Tom,28,Delhi
1,Jerry,31,Mumbai


In [87]:
df1.drop('Age', axis=1, inplace=True)

In [88]:
df1

Unnamed: 0,Name,City
0,Tom,Delhi
1,Jerry,Mumbai


In [89]:
df1['FavFood'] = ['Idli', 'Dosa']

In [90]:
df1

Unnamed: 0,Name,City,FavFood
0,Tom,Delhi,Idli
1,Jerry,Mumbai,Dosa


In [91]:
df1[['Name', 'City']]

Unnamed: 0,Name,City
0,Tom,Delhi
1,Jerry,Mumbai


In [97]:
randomDf1.sort_index(axis = 1, ascending=False)

Unnamed: 0,4,3,2,1,0
0,46,90,17,84,6
1,30,85,98,28,67
2,76,14,35,23,40
3,89,19,94,34,70
4,71,95,20,4,86
...,...,...,...,...,...
195,51,89,59,96,19
196,55,9,48,58,84
197,73,78,90,47,37
198,53,61,99,10,56


In [98]:
randomDf1.sort_index(axis = 0, ascending=False)

Unnamed: 0,0,1,2,3,4
199,67,47,29,91,7
198,56,10,99,61,53
197,37,47,90,78,73
196,84,58,48,9,55
195,19,96,59,89,51
...,...,...,...,...,...
4,86,4,20,95,71
3,70,34,94,19,89
2,40,23,35,14,76
1,67,28,98,85,30


In [102]:
df1['Name']

0      Tom
1    Jerry
Name: Name, dtype: object

In [104]:
df1

Unnamed: 0,Name,City,FavFood
0,Tom,Delhi,Idli
1,Jerry,Mumbai,Dosa


In [107]:
df1.columns = ["apple", "ball", "cat"]

In [108]:
df1

Unnamed: 0,apple,ball,cat
0,Tom,Delhi,Idli
1,Jerry,Mumbai,Dosa


In [109]:
df1.columns = ["Name",	"City",	"FavFood"]

In [110]:
df1

Unnamed: 0,Name,City,FavFood
0,Tom,Delhi,Idli
1,Jerry,Mumbai,Dosa


In [111]:
df1['Name'][0]

'Tom'

In [112]:
# Not a recommended way of updation
df1['Name'][0] = 'Mr Tom'

You are setting values through chained assignment. Currently this works in certain cases, but when using Copy-on-Write (which will become the default behaviour in pandas 3.0) this will never work to update the original DataFrame or Series, because the intermediate object on which we are setting values will behave as a copy.
A typical example is when you are setting values in a column of a DataFrame, like:

df["col"][row_indexer] = value

Use `df.loc[row_indexer, "col"] = values` instead, to perform the assignment in a single step and ensure this keeps updating the original `df`.

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy

  df1['Name'][0] = 'Mr Tom'


In [113]:
df1['Name'][0]

'Mr Tom'

In [114]:
df1

Unnamed: 0,Name,City,FavFood
0,Mr Tom,Delhi,Idli
1,Jerry,Mumbai,Dosa


In [120]:
# recommended way to update
df1.loc[0, 'Name'] = 'TOM'

In [121]:
df1

Unnamed: 0,Name,City,FavFood
0,TOM,Delhi,Idli
1,Jerry,Mumbai,Dosa


In [122]:
randomDf

Unnamed: 0,0,1,2,3,4
0,61,95,34,81,50
1,40,34,57,98,88
2,32,73,72,49,7
3,44,71,44,39,62
4,58,64,55,46,61


In [127]:
randomDf.loc[(randomDf[0] < 50) & (randomDf[3] < 50) ]

Unnamed: 0,0,1,2,3,4
2,32,73,72,49,7
3,44,71,44,39,62


In [128]:
randomDf

Unnamed: 0,0,1,2,3,4
0,61,95,34,81,50
1,40,34,57,98,88
2,32,73,72,49,7
3,44,71,44,39,62
4,58,64,55,46,61


In [129]:
randomDf.iloc[2,2]

72

In [130]:
df1

Unnamed: 0,Name,City,FavFood
0,TOM,Delhi,Idli
1,Jerry,Mumbai,Dosa


In [133]:
df1.loc[0,'City']

'Delhi'

In [None]:
df1.loc[0,'City']