In [1]:
import pandas as pd

## ![image.png](attachment:9ade39bd-f79c-420e-a579-54e9dc0c6ebb.png)

>`read_html()` : used to read all tables in a web html page and returns those table in a list

In [2]:
l = pd.read_html('https://www.basketball-reference.com/leagues/NBA_2015_totals.html')

In [3]:
df1 = l[0]
type(l)

list

In [4]:
df1.head()

Unnamed: 0,Rk,Player,Pos,Age,Tm,G,GS,MP,FG,FGA,...,FT%,ORB,DRB,TRB,AST,STL,BLK,TOV,PF,PTS
0,1,Quincy Acy,PF,24,NYK,68,22,1287,152,331,...,0.784,79,222,301,68,27,22,60,147,398
1,2,Jordan Adams,SG,20,MEM,30,0,248,35,86,...,0.609,9,19,28,16,16,7,14,24,94
2,3,Steven Adams,C,21,OKC,70,67,1771,217,399,...,0.502,199,324,523,66,38,86,99,222,537
3,4,Jeff Adrien,PF,28,MIN,17,0,215,19,44,...,0.579,23,54,77,15,4,9,9,30,60
4,5,Arron Afflalo,SG,29,TOT,78,72,2502,375,884,...,0.843,27,220,247,129,41,7,116,167,1035


<br></br>
>`Series()` : use to create a series

In [5]:
name = pd.Series(['Ankan', 'Akash', 'Aritra', 'Anwesa', 'Aditi'])

In [6]:
type( name )

pandas.core.series.Series

<br></br>
>`DataFrame()` : use to create a dataframe object

In [7]:
df = pd.DataFrame({
    'Name' : ['Ankan', 'Akash', 'Aritra', 'Anwesa', 'Aditi'],
    'Age' : [22, 16, 14, 19, 17],
    'Sex' : ['male', 'male', 'male', 'female', 'female']
})

In [8]:
df

Unnamed: 0,Name,Age,Sex
0,Ankan,22,male
1,Akash,16,male
2,Aritra,14,male
3,Anwesa,19,female
4,Aditi,17,female


In [9]:
type(df)

pandas.core.frame.DataFrame

<br></br>
> `to_csv()` : stores the dataframe in a csv file

In [10]:
df.to_csv('person_data.csv', index=False)

<br></br>
> `read_csv()` : used to read any csv file and convert it to DataFrame object

In [11]:
df = pd.read_csv('person.csv')
df.head()

Unnamed: 0,Name,Age,Sex
0,Ankan,22,male
1,Akash,16,male
2,Aritra,14,male
3,Anwesa,19,female
4,Aditi,17,female


<br></br>
> `to_datetime()` : convert argument to datetime object

In [12]:
pd.to_datetime('2023-02-03')

Timestamp('2023-02-03 00:00:00')

---
<br></br>
## ![image.png](attachment:d976af75-9c82-4796-b7db-6f97e6c8b6d2.png)

In [13]:
df = pd.DataFrame({
    'A' : [12, 34, 22, 12, 43, 23], 
    'B' : [10, 24, 23, 45, 98, 45], 
    'C' : [98, 76, 87, 45, 76, 46]
})

In [14]:
df

Unnamed: 0,A,B,C
0,12,10,98
1,34,24,76
2,22,23,87
3,12,45,45
4,43,98,76
5,23,45,46


In [15]:
def reindex( df ) :
    df.index = pd.RangeIndex(1, len(df) << 1, 2)
    

In [16]:
reindex( df )

In [17]:
df # updated dataframe

Unnamed: 0,A,B,C
1,12,10,98
3,34,24,76
5,22,23,87
7,12,45,45
9,43,98,76
11,23,45,46


---
<br></br>
## ![image.png](attachment:2952b626-b0d7-408a-86a4-978374047224.png)

In [20]:
df = pd.DataFrame({ 'Values' : [10, 20, 30, 40, 50] })

In [24]:
def print_first_three_sum( df : pd.DataFrame, column = 'Values') :
    res = df[column][:3].sum()
    print('the sum of 1st 3 rows in Values column : ', res)
        

In [25]:
print_first_three_sum( df )

the sum of 1st 3 rows in Values column :  60


---
<br></br>
## ![image.png](attachment:833290c3-a313-45e5-8d67-385eba186f7e.png)

In [26]:
df = pd.DataFrame({ 'Text' : ['I am Ankan', 'I love AI', 'Statistics is a piller of Machine Learning'] })

In [27]:
df

Unnamed: 0,Text
0,I am Ankan
1,I love AI
2,Statistics is a piller of Machine Learning


In [28]:
df['Word_Count'] = df.Text.apply(lambda t : len(t.split()))

In [29]:
df

Unnamed: 0,Text,Word_Count
0,I am Ankan,3
1,I love AI,3
2,Statistics is a piller of Machine Learning,7


---
<br></br>
## ![image.png](attachment:9200a079-c7f6-4086-b4f5-72e8f77891b4.png)

In [30]:
df

Unnamed: 0,Text,Word_Count
0,I am Ankan,3
1,I love AI,3
2,Statistics is a piller of Machine Learning,7


>`DataFrame.shape()` : it returns a tuple which include 2 values, 1st no of rows and 2nd the no of columns in that dataframe.

In [31]:
df.shape

(3, 2)

>`DataFrame.size()` : it returns total number of values (including NaN or empty) in that DataFrame. we can say it returns (no of rows) * (no of columns)

In [32]:
df.size

6

---
<br></br>
## ![image.png](attachment:3fa4ec3b-1c96-442d-b90b-f58158d46d3f.png)

we use `read_excel()` function in pandas to read an excel file

---
<br></br>
## ![image.png](attachment:805294bb-c188-408d-a4d8-952c7d08fe0e.png)

In [33]:
df = pd.DataFrame({ 'Email' : [ 'john.doe@example.com', 'ankan@gmail.com', 'akash@gmail.com', 'aritra@rediffmail.com' ] }) 

In [34]:
df

Unnamed: 0,Email
0,john.doe@example.com
1,ankan@gmail.com
2,akash@gmail.com
3,aritra@rediffmail.com


In [35]:
def set_username( df : pd.DataFrame ) :
    df['Username'] = df.Email.apply( lambda e : e.split('@')[0] )

In [36]:
set_username( df )

In [37]:
df

Unnamed: 0,Email,Username
0,john.doe@example.com,john.doe
1,ankan@gmail.com,ankan
2,akash@gmail.com,akash
3,aritra@rediffmail.com,aritra


---
<br></br>
## ![image.png](attachment:e385f2e9-52b4-4e97-9a17-375490639df8.png) ![image.png](attachment:910da383-017c-403c-9710-b26d0636ca01.png)

In [38]:
df = pd.DataFrame({ 'A' : [3, 8, 6, 2, 9], 'B' : [5, 2, 9, 3, 1], 'C' : [1, 7, 4, 5, 2] })

In [39]:
df

Unnamed: 0,A,B,C
0,3,5,1
1,8,2,7
2,6,9,4
3,2,3,5
4,9,1,2


In [40]:
def select_rows( df : pd.DataFrame, condition : bool = (df.A > 5) & (df.B < 10) ) :
    return df[condition]

In [41]:
new_df = select_rows(df)

new_df

Unnamed: 0,A,B,C
1,8,2,7
2,6,9,4
4,9,1,2


---
<br></br>
## ![image.png](attachment:5cd68e3b-4c9e-4da9-bc95-f1105e06f263.png)

In [42]:
df = pd.DataFrame({'Values' : [1, 5, 3, 7, 8, 4, 8]})

In [43]:
df

Unnamed: 0,Values
0,1
1,5
2,3
3,7
4,8
5,4
6,8


In [44]:
def sr_mean( series) :
    ''' calculate mean of given series '''
    return series.mean()

def sr_median( series) :
    ''' calculate median of given series '''
    return series.median()

def sr_std( series) :
    ''' calculate standard deviation of given series '''
    return series.std()

In [45]:
sr_mean( df.Values )

5.142857142857143

In [46]:
sr_median( df.Values )

5.0

In [47]:
sr_std( df.Values )

2.6726124191242437

---
<br></br>
## ![image.png](attachment:d75107f6-02a2-4cb3-bdad-665f8757ec3c.png)

In [48]:
df = pd.DataFrame({
    'Sales' : [ 16, 21, 27, 18, 29, 30, 21, 25, 34, 27, 19, 24, 20, 23, 25],
    'Date': pd.date_range('2023-02-16', periods=15)
})

In [49]:
def moving_averagee_in_week( df : pd.DataFrame) :
    
    df['MovingAverage'] = df.rolling( window=7 ).mean()

In [50]:
moving_averagee_in_week( df )

In [51]:
df

Unnamed: 0,Sales,Date,MovingAverage
0,16,2023-02-16,
1,21,2023-02-17,
2,27,2023-02-18,
3,18,2023-02-19,
4,29,2023-02-20,
5,30,2023-02-21,
6,21,2023-02-22,23.142857
7,25,2023-02-23,24.428571
8,34,2023-02-24,26.285714
9,27,2023-02-25,26.285714


asume current date is 2023-03-02

---
<br></br>
## ![image.png](attachment:79174774-d873-4c85-ac5e-84d6c2a0b679.png)

In [52]:
df = pd.DataFrame({'Date' : ['2023-01-01', '2023-01-02', '2023-01-03', '2023-01-04', '2023-01-05']})

In [53]:
df

Unnamed: 0,Date
0,2023-01-01
1,2023-01-02
2,2023-01-03
3,2023-01-04
4,2023-01-05


In [54]:
def weekday_calculate( df : pd.DataFrame ) :
    if df.Date.dtype == 'O' :
        df['Date'] = pd.to_datetime(df.Date)
    
    df['Weekday'] = df.Date.dt.strftime('%A')

In [55]:
weekday_calculate( df )

In [56]:
df

Unnamed: 0,Date,Weekday
0,2023-01-01,Sunday
1,2023-01-02,Monday
2,2023-01-03,Tuesday
3,2023-01-04,Wednesday
4,2023-01-05,Thursday


---
<br></br>
## ![image.png](attachment:ac5dcbd6-8322-461f-ad56-90ea23049924.png)

In [57]:
df = pd.DataFrame({
    'Date' : ['2022-12-25', '2022-12-30', '2023-01-01', '2023-01-03',
              '2023-01-05', '2023-01-10', '2023-01-14', '2023-01-25', 
              '2023-01-30', '2023-01-31', '2023-02-05', '2023-02-06']
})

In [58]:
def select_rows_between_date(df) :
    
    #if date in object formate
    #convert to datetime object
    if df.Date.dtype == 'O' :
        df['Date'] = pd.to_datetime(df.Date)
    
    return df[ (df.Date >= '2023-01-01') & (df.Date <= '2023-01-31') ]
    

In [59]:
select_rows_between_date( df )

Unnamed: 0,Date
2,2023-01-01
3,2023-01-03
4,2023-01-05
5,2023-01-10
6,2023-01-14
7,2023-01-25
8,2023-01-30
9,2023-01-31


---
<br></br>
## ![image.png](attachment:0b1dbaa0-f2d4-48ca-a92e-9b91ac8c42a3.png)

we have to import `pandas`

In [60]:
import pandas

---