### Series

In [1]:
import pandas as pd

In [2]:
prices = pd.Series([100,140,150,60,90])

In [3]:
type(prices)

pandas.core.series.Series

In [4]:
prices.index

RangeIndex(start=0, stop=5, step=1)

In [5]:
prices = pd.Series([100,140,150,60,90], index=['p1','p2','p3','p4','p5'])

In [6]:
prices.index

Index(['p1', 'p2', 'p3', 'p4', 'p5'], dtype='object')

In [7]:
prices.values

array([100, 140, 150,  60,  90], dtype=int64)

In [8]:
s1 = pd.Series([100,'Abc',200])

In [9]:
s1.values

array([100, 'Abc', 200], dtype=object)

In [10]:
s1.loc[0], s1.iloc[0]

(100, 100)

In [11]:
prices.mean()

108.0

In [12]:
# Not possible 
# s1.mean()

In [13]:
prices.loc['p3']

150

In [14]:
prices.iloc[0]

100

In [15]:
prices.head(3)

p1    100
p2    140
p3    150
dtype: int64

In [16]:
prices.tail(3)

p3    150
p4     60
p5     90
dtype: int64

In [17]:
prices.is_unique

True

In [18]:
summary = prices.agg(['average','sum','min','max'])

In [19]:
summary

average    108.0
sum        540.0
min         60.0
max        150.0
dtype: float64

In [20]:
type(summary)

pandas.core.series.Series

In [21]:
prices.describe()

count      5.000000
mean     108.000000
std       37.013511
min       60.000000
25%       90.000000
50%      100.000000
75%      140.000000
max      150.000000
dtype: float64

In [22]:
sales = pd.Series([20,35,45])

In [23]:
sales.cumsum()

0     20
1     55
2    100
dtype: int64

In [24]:
sales.pct_change()

0         NaN
1    0.750000
2    0.285714
dtype: float64

#### Using apply() function 

In [25]:
def classify_price(price):
    return 'High' if price > 100 else 'Low'

prices.apply(classify_price)

p1     Low
p2    High
p3    High
p4     Low
p5     Low
dtype: object

In [26]:
# Using lambda
prices.apply(lambda price : f"{price:5d} - ({'High' if price > 100 else 'Low'})")

p1       100 - (Low)
p2      140 - (High)
p3      150 - (High)
p4        60 - (Low)
p5        90 - (Low)
dtype: object

In [27]:
prices.sort_values(ascending=False).head(3)

p3    150
p2    140
p1    100
dtype: int64

In [28]:
print(prices)
prices.rank()

p1    100
p2    140
p3    150
p4     60
p5     90
dtype: int64


p1    3.0
p2    4.0
p3    5.0
p4    1.0
p5    2.0
dtype: float64

In [29]:
langs = pd.Series(['Java','Python','Python','Java','C#'], index=[101,102,103,104,105])

In [30]:
langs

101      Java
102    Python
103    Python
104      Java
105        C#
dtype: object

In [31]:
uv = langs.unique()

In [32]:
type(uv)

numpy.ndarray

In [33]:
langs.value_counts()

Java      2
Python    2
C#        1
dtype: int64

In [34]:
langs.mode()[0]

'Java'

## Process marks.csv

In [35]:
marks = pd.read_csv("marks.csv")

In [36]:
marks.head()

Unnamed: 0,Rollno,Subject,Marks
0,1,DS,70
1,2,DS,80
2,3,DS,95
3,1,C,80
4,2,C,90


In [37]:
marks.shape

(9, 3)

In [38]:
marks.columns

Index(['Rollno', 'Subject', 'Marks'], dtype='object')

In [39]:
marks.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 9 entries, 0 to 8
Data columns (total 3 columns):
Rollno     9 non-null int64
Subject    9 non-null object
Marks      9 non-null int64
dtypes: int64(2), object(1)
memory usage: 296.0+ bytes


In [40]:
marks.describe(include="all")

Unnamed: 0,Rollno,Subject,Marks
count,9.0,9,9.0
unique,,5,
top,,C,
freq,,3,
mean,2.0,,82.333333
std,0.866025,,7.842194
min,1.0,,70.0
25%,1.0,,80.0
50%,2.0,,80.0
75%,3.0,,90.0


In [41]:
marks.Marks

0    70
1    80
2    95
3    80
4    90
5    76
6    90
7    80
8    80
Name: Marks, dtype: int64

In [42]:
marks['Marks']

0    70
1    80
2    95
3    80
4    90
5    76
6    90
7    80
8    80
Name: Marks, dtype: int64

In [43]:
m = marks['Marks']

In [44]:
type(m)

pandas.core.series.Series

In [45]:
marks['Marks'].describe()

count     9.000000
mean     82.333333
std       7.842194
min      70.000000
25%      80.000000
50%      80.000000
75%      90.000000
max      95.000000
Name: Marks, dtype: float64

In [46]:
marks[['Subject','Marks']]

Unnamed: 0,Subject,Marks
0,DS,70
1,DS,80
2,DS,95
3,C,80
4,C,90
5,C,76
6,Java,90
7,C#,80
8,Python,80


In [47]:
marks[:3]

Unnamed: 0,Rollno,Subject,Marks
0,1,DS,70
1,2,DS,80
2,3,DS,95


#### Read countries info into DF

In [48]:
import requests
resp = requests.get("https://restcountries.eu/rest/v2/all")
cdf = pd.read_json(resp.text)

In [49]:
cdf.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 250 entries, 0 to 249
Data columns (total 24 columns):
alpha2Code        250 non-null object
alpha3Code        250 non-null object
altSpellings      250 non-null object
area              240 non-null float64
borders           250 non-null object
callingCodes      250 non-null object
capital           250 non-null object
cioc              247 non-null object
currencies        250 non-null object
demonym           250 non-null object
flag              250 non-null object
gini              153 non-null float64
languages         250 non-null object
latlng            250 non-null object
name              250 non-null object
nativeName        250 non-null object
numericCode       249 non-null float64
population        250 non-null int64
region            250 non-null object
regionalBlocs     250 non-null object
subregion         250 non-null object
timezones         250 non-null object
topLevelDomain    250 non-null object
translations      2

In [50]:
cdf['region'].unique()

array(['Asia', 'Europe', 'Africa', 'Oceania', 'Americas', 'Polar', ''],
      dtype=object)

In [51]:
cdf.dtypes

alpha2Code         object
alpha3Code         object
altSpellings       object
area              float64
borders            object
callingCodes       object
capital            object
cioc               object
currencies         object
demonym            object
flag               object
gini              float64
languages          object
latlng             object
name               object
nativeName         object
numericCode       float64
population          int64
region             object
regionalBlocs      object
subregion          object
timezones          object
topLevelDomain     object
translations       object
dtype: object

In [52]:
cdf[ ['area','population']].describe()

Unnamed: 0,area,population
count,240.0,250.0
mean,626210.7,29396550.0
std,1950028.0,124457800.0
min,0.44,0.0
25%,1533.25,202903.0
50%,71006.5,4648734.0
75%,414643.2,18039550.0
max,17124440.0,1377422000.0


### Selection

In [53]:
marks.loc[:4, ['Subject','Marks']]

Unnamed: 0,Subject,Marks
0,DS,70
1,DS,80
2,DS,95
3,C,80
4,C,90


In [54]:
marks.loc[:4, :]

Unnamed: 0,Rollno,Subject,Marks
0,1,DS,70
1,2,DS,80
2,3,DS,95
3,1,C,80
4,2,C,90


In [55]:
marks.iloc[4,0]

2

In [56]:
marks.isin(['C','Java'])

Unnamed: 0,Rollno,Subject,Marks
0,False,False,False
1,False,False,False
2,False,False,False
3,False,True,False
4,False,True,False
5,False,True,False
6,False,True,False
7,False,False,False
8,False,False,False


In [57]:
marks[marks['Marks'] > 80][['Subject','Marks']]

Unnamed: 0,Subject,Marks
2,DS,95
4,C,90
6,Java,90


In [58]:
marks.loc[marks['Marks'] > 80, ['Subject','Marks']]

Unnamed: 0,Subject,Marks
2,DS,95
4,C,90
6,Java,90


In [59]:
marks.loc[(marks['Marks'] > 80)  & ( marks['Subject'] == 'DS'), ['Subject','Marks']]

Unnamed: 0,Subject,Marks
2,DS,95


In [60]:
highmarks = marks['Marks'] > 80
dssubject = marks['Subject'] == 'DS'

In [61]:
dssubject

0     True
1     True
2     True
3    False
4    False
5    False
6    False
7    False
8    False
Name: Subject, dtype: bool

In [62]:
marks.loc[(highmarks & dssubject), :]

Unnamed: 0,Rollno,Subject,Marks
2,3,DS,95


In [63]:
# Boolean indexing for col labels 
marks.loc[ [True,False], [True,False,True]]

Unnamed: 0,Rollno,Marks
0,1,70


In [64]:
subs = marks['Subject']

In [65]:
subs.str.startswith('D')

0     True
1     True
2     True
3    False
4    False
5    False
6    False
7    False
8    False
Name: Subject, dtype: bool

In [66]:
marks[marks['Subject'].str.startswith('D')]

Unnamed: 0,Rollno,Subject,Marks
0,1,DS,70
1,2,DS,80
2,3,DS,95


In [67]:
marks['Subject'].str.upper()

0        DS
1        DS
2        DS
3         C
4         C
5         C
6      JAVA
7        C#
8    PYTHON
Name: Subject, dtype: object

In [68]:
marks.columns.str.upper().str.contains('R')

array([ True, False,  True])

In [69]:
marks

Unnamed: 0,Rollno,Subject,Marks
0,1,DS,70
1,2,DS,80
2,3,DS,95
3,1,C,80
4,2,C,90
5,3,C,76
6,1,Java,90
7,2,C#,80
8,3,Python,80


In [75]:
def fun(s):
    return s.count()

def format(v):
    return f'value : {v}'


In [82]:
# marks.applymap(format)
marks.applymap(lambda v : f"Value = {v}")

Unnamed: 0,Rollno,Subject,Marks
0,Value = 1,Value = DS,Value = 70
1,Value = 2,Value = DS,Value = 80
2,Value = 3,Value = DS,Value = 95
3,Value = 1,Value = C,Value = 80
4,Value = 2,Value = C,Value = 90
5,Value = 3,Value = C,Value = 76
6,Value = 1,Value = Java,Value = 90
7,Value = 2,Value = C#,Value = 80
8,Value = 3,Value = Python,Value = 80
