# Ex 1: How to import pandas and check the version?
Q: As a warm up, we will import pandas and print it’s version

In [3]:
import pandas as pd
import numpy as np
import warnings
warnings.filterwarnings('ignore')

In [4]:
print(pd.__version__)

1.4.2


# Ex 2: How to create a series from a list, numpy array and dict?
Q: Create a pandas series from each of the items below: a list, numpy and a dictionary and print the first 5 elements.

In [9]:
mylist=list('abcdefghijklmnopqrstuvwxyz')
my_arr=np.arange(25)
my_dict=dict(zip(mylist,my_arr))

In [15]:
my_list=pd.Series(mylist)
my_list.head()

0    a
1    b
2    c
3    d
4    e
dtype: object

In [17]:
my_arr=pd.Series(my_arr)
my_arr.head()

0    0
1    1
2    2
3    3
4    4
dtype: int32

In [18]:
my_dict=pd.Series(my_dict)
my_dict.head()

a    0
b    1
c    2
d    3
e    4
dtype: int32

# Ex 3: How to convert the index of a series into a column of a dataframe?
Q: Convert the series ser into a dataframe with its index as another column on the dataframe.

In [34]:
my_list=list('abcdefghijklmnopqrstuvwxyz')
my_array=np.arange(26)
my_dict=dict(zip(my_list,my_array))
ser=pd.Series(my_dict)
ser.head()

a    0
b    1
c    2
d    3
e    4
dtype: int32

In [35]:
ser.to_frame().reset_index().head()

Unnamed: 0,index,0
0,a,0
1,b,1
2,c,2
3,d,3
4,e,4


# Ex 4: How to combine many series to form a dataframe?
Q: Combine ser1 and ser2 to form a dataframe.

In [37]:
ser1=pd.Series(list('abcdefghijklmnopqrstuvqxyz'),name='ser1')
ser2=pd.Series(np.arange(25),name='ser2')

In [50]:
ser3=pd.concat((ser1,ser2),axis=1)

In [51]:
ser3.head()

Unnamed: 0,ser1,ser2
0,a,0.0
1,b,1.0
2,c,2.0
3,d,3.0
4,e,4.0


In [52]:
#2nd Method

In [60]:
pd.DataFrame({'ser1':ser1,'ser2':ser2}).head()

Unnamed: 0,ser1,ser2
0,a,0.0
1,b,1.0
2,c,2.0
3,d,3.0
4,e,4.0


# Ex 5: How to assign name to the series’ index?
Q: Give a name to the series ser calling it ‘alphabets’.

In [65]:
ser=pd.Series(list('abcdefghijklmnopqrtuvwxyz'))
ser.head()

0    a
1    b
2    c
3    d
4    e
dtype: object

In [67]:
ser.name='abplhabets'

In [69]:
ser.head()

0    a
1    b
2    c
3    d
4    e
Name: abplhabets, dtype: object

# Ex 6: How to get the items of series A not present in series B?
Q: From ser1 remove items present in ser2.

In [70]:
ser1 = pd.Series([1, 2, 3, 4, 5])
ser2 = pd.Series([4, 5, 6, 7, 8])

In [77]:
ser1[~ser1.isin(ser2)]

0    1
1    2
2    3
dtype: int64

# Ex 7: How to get the items not common to both series A and series B?
Q: Get all items of ser1 and ser2 not common to both.

In [79]:
ser1 = pd.Series([1, 2, 3, 4, 5])
ser2 = pd.Series([4, 5, 6, 7, 8])

In [81]:
unique1=ser1[~ser1.isin(ser2)]
unique1

0    1
1    2
2    3
dtype: int64

In [82]:
unique2=ser2[~ser2.isin(ser1)]
unique2

2    6
3    7
4    8
dtype: int64

In [85]:
uniques=pd.Series(np.union1d(unique1,unique2))
uniques

0    1
1    2
2    3
3    6
4    7
5    8
dtype: int64

# Ex 8: How to get the minimum, 25th percentile, median, 75th, and max of a numeric series?
Q: Compute the minimum, 25th percentile, median, 75th, and maximum of ser.

In [96]:
ser=pd.Series(np.random.normal(10,5,25))
ser.head()

0    12.663139
1     4.594816
2    12.829877
3    12.265178
4     5.492681
dtype: float64

In [98]:
ser.min()

-1.125797035215614

In [99]:
ser.median()

11.828958638377129

In [100]:
ser.quantile()

11.828958638377129

In [101]:
ser.max()

24.86390601139366

In [105]:
ser.quantile(.75)

13.907216566220363

# Ex 9: How to get frequency counts of unique items of a series?
Q: Calculate the frequency counts of each unique value ser.

In [114]:
ser=pd.Series(np.take(list('abcdefgh'),np.random.randint(8,size=28)))


In [116]:
ser.head()

0    a
1    c
2    f
3    a
4    h
dtype: object

In [118]:
ser.value_counts()

h    9
c    5
a    4
f    4
b    3
d    2
e    1
dtype: int64

# Ex 10: How to keep only the top 2 most frequent values as it is and replace everything else as ‘Other’?
Q: From ser, keep the top 2 most frequent items as it is and replace everything else as ‘Other’.

In [128]:
np.random.RandomState(100)
ser = pd.Series(np.random.randint(1, 5, [12]))

In [129]:
ser

0     1
1     3
2     2
3     3
4     3
5     3
6     3
7     4
8     4
9     1
10    3
11    1
dtype: int32

In [130]:
most_freq=ser.value_counts()[:2].index

In [132]:
ser[~ser.isin(most_freq)]='other'

In [133]:
ser

0         1
1         3
2     other
3         3
4         3
5         3
6         3
7     other
8     other
9         1
10        3
11        1
dtype: object

# Ex 11: How to bin a numeric series to 10 groups of equal size?
Q: Bin the series ser into 10 equal deciles and replace the values with the bin name.

In [150]:
ser = pd.Series(np.random.randint(1,21,20))

In [156]:
ser

0     13
1      6
2     13
3      7
4     13
5     15
6     18
7     19
8     13
9     16
10    12
11    17
12    15
13    14
14     1
15     9
16     5
17     4
18    13
19    19
dtype: int32

In [155]:
pd.cut(ser,bins=10,labels=['1st','2nd','3rd','4th','5th','6th','7th','8th','9th','10th'])

0      7th
1      3rd
2      7th
3      4th
4      7th
5      8th
6     10th
7     10th
8      7th
9      9th
10     7th
11     9th
12     8th
13     8th
14     1st
15     5th
16     3rd
17     2nd
18     7th
19    10th
dtype: category
Categories (10, object): ['1st' < '2nd' < '3rd' < '4th' ... '7th' < '8th' < '9th' < '10th']

# Ex 12: How to convert a numpy array to a dataframe of given shape? (L1)
Q: Reshape the series ser into a dataframe with 7 rows and 5 columns

In [139]:
a=pd.Series(np.arange(35))

In [141]:
a.head()

0    0
1    1
2    2
3    3
4    4
dtype: int32

In [142]:
a.values.reshape(7,5)

array([[ 0,  1,  2,  3,  4],
       [ 5,  6,  7,  8,  9],
       [10, 11, 12, 13, 14],
       [15, 16, 17, 18, 19],
       [20, 21, 22, 23, 24],
       [25, 26, 27, 28, 29],
       [30, 31, 32, 33, 34]])

# Ex 13: How to find the positions of numbers that are multiples of 3 from a series?
Q: Find the positions of numbers that are multiples of 3 from ser.

In [157]:
ser=pd.Series(np.random.randint(1,10,7))

In [158]:
ser

0    5
1    1
2    6
3    9
4    8
5    4
6    1
dtype: int32

In [159]:
pd.Series(ser[ser%3==0].index)

0    2
1    3
dtype: int64

# Ex 14: How to extract items at given positions from a series
Q: From ser, extract the items at positions in list pos.

In [162]:
ser=pd.Series(list('abcdefghijklmnopqrstuvwxyz'))
pos=[0,4,8,14,20]

In [163]:
ser.take(pos)

0     a
4     e
8     i
14    o
20    u
dtype: object

In [167]:
#2nd Method

In [166]:
pd.Series(ser.iloc[pos])

0     a
4     e
8     i
14    o
20    u
dtype: object

# Ex 15: How to stack two series vertically?

Q: Stack ser1 and ser2 vertically to form a dataframe.

In [170]:
ser1 = pd.Series(range(5))
ser2 = pd.Series(list('abcde'))

In [171]:
ser1

0    0
1    1
2    2
3    3
4    4
dtype: int64

In [172]:
ser2

0    a
1    b
2    c
3    d
4    e
dtype: object

In [177]:
s=pd.concat([ser1,ser2])

In [178]:
s

0    0
1    1
2    2
3    3
4    4
0    a
1    b
2    c
3    d
4    e
dtype: object

# Ex 16: How to get the positions of items of series A in another series B?
Q: Get the positions of items of ser2 in ser1 as a list

In [179]:
ser1 = pd.Series([10, 9, 6, 5, 3, 1, 12, 8, 13])
ser2 = pd.Series([1, 3, 10, 13])

In [187]:
list(ser1[ser1.isin(ser2)].index)

[0, 4, 5, 8]

# Ex 17: How to compute the mean squared error on series A and predicted series B?
Q: Compute the mean squared error of truth and pred series.

In [188]:
truth = pd.Series(range(10))
pred = pd.Series(range(10)) + np.random.random(10)

# Ex 18: How to convert the first character of each element in a series to uppercase?
Q: Change the first character of each word to upper case in each word of ser.

In [192]:
ser = pd.Series(['how', 'to', 'kick', 'ass?'])

In [193]:
ser

0     how
1      to
2    kick
3    ass?
dtype: object

In [201]:
ser.map(lambda x:x.title())

0     How
1      To
2    Kick
3    Ass?
dtype: object

In [202]:
#2nd Method is

In [204]:
ser.str.capitalize()

0     How
1      To
2    Kick
3    Ass?
dtype: object

# Ex 19: How to calculate the number of characters in each word in a series?
Q: Get the number of characters in each word in a series

In [205]:
ser = pd.Series(['how', 'to', 'kick', 'ass?'])

In [218]:
ser.map(lambda x:len(x))

0    3
1    2
2    4
3    4
dtype: int64

# Ex 20: How to compute the difference of differences between consecutive numbers of a series?
Q: Find the difference of differences between the consecutive numbers of ser.

In [225]:
ser = pd.Series([1, 3, 6, 10, 15, 21, 27, 35])

In [228]:
ser.diff().tolist()

[nan, 2.0, 3.0, 4.0, 5.0, 6.0, 6.0, 8.0]

In [233]:
ser.diff().diff().tolist()

[nan, nan, 1.0, 1.0, 1.0, 1.0, 0.0, 2.0]

# Ex 21: How to convert a series of date-strings to a timeseries?
Q: How to convert a series of date-strings to a timeseries?

In [234]:
ser = pd.Series(['01 Jan 2010', '02-02-2011', '20120303', '2013/04/04', '2014-05-05', '2015-06-06T12:20'])

In [235]:
ser

0         01 Jan 2010
1          02-02-2011
2            20120303
3          2013/04/04
4          2014-05-05
5    2015-06-06T12:20
dtype: object

In [241]:
pd.to_datetime(ser)

0   2010-01-01 00:00:00
1   2011-02-02 00:00:00
2   2012-03-03 00:00:00
3   2013-04-04 00:00:00
4   2014-05-05 00:00:00
5   2015-06-06 12:20:00
dtype: datetime64[ns]

# Ex 22: How to get the day of the month, week number, day of year and day of the week from a series of date strings?
Q: Get the day of the month, week number, day of year and day of the week from ser.

In [242]:
ser = pd.Series(['01 Jan 2010', '02-02-2011', '20120303', '2013/04/04', '2014-05-05', '2015-06-06T12:20'])

In [244]:
ser_dt=pd.to_datetime(ser)

In [246]:
date=list(ser_dt.dt.day)

In [247]:
date

[1, 2, 3, 4, 5, 6]

In [248]:
month=list(ser_dt.dt.month)

In [249]:
month

[1, 2, 3, 4, 5, 6]

In [259]:
week=list(ser_dt.dt.day_name())

In [260]:
week

['Friday', 'Wednesday', 'Saturday', 'Thursday', 'Monday', 'Saturday']

In [255]:
year=list(ser_dt.dt.dayofyear)

In [256]:
year

[1, 33, 63, 94, 125, 157]

# Ex 23: How to convert year-month string to dates corresponding to the 4th day of the month?
Q: Change ser to dates that start with 4th of the respective months.

In [273]:
input = ['2010/01/04','2011/02/04','2012/03/04']

In [272]:
format = '%Y/%m/%d'

In [262]:
# ouput look like
# 0   2010-01-04
# 1   2011-02-04
# 2   2012-03-04
# dtype: datetime64[ns]

# Ex 24: How to filter words that contain atleast 2 vowels from a series?
Q: From ser, extract words that contain atleast 2 vowels.

In [263]:
ser = pd.Series(['Apple', 'Orange', 'Plan', 'Python', 'Money'])

# Ex 25: How to filter valid emails from a series?
Extract the valid emails from the series emails. The regex pattern for valid emails is provided as

In [264]:
emails = pd.Series(['buying books at amazom.com', 'rameses@egypt.com', 'matt@t.co', 'narendra@modi.com','dad@comp'])
pattern ='[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\\.[A-Za-z]{2,4}'

In [270]:
emails[emails.str.match(pat=pattern)]

1    rameses@egypt.com
2            matt@t.co
3    narendra@modi.com
dtype: object

In [None]:
#Student of DataScience
#Student of DataTrained-Saurav
#Date-28-07-2022
#Time-11:46