In [2]:
import numpy as np 
import pandas as pd

In [3]:
#Ex1: 1. How to create a series from a list, numpy array and dict?
a_list = list("abcdefg")
np_arr = np.arange(1,10)
dic = {x:y for y,x in enumerate(["A", "B", "C", "D", "E"])}

series_lst = pd.Series(a_list)
series_arr = pd.Series(np_arr)
series_dic = pd.Series(dic)

In [4]:
#Ex2: How to combine many series to form a dataframe?
ser1 = pd.Series(list('abcedfghijklmnopqrstuvwxyz'))
ser2 = pd.Series(np.arange(26))

df = pd.DataFrame(ser1, ser2).reset_index()
df

Unnamed: 0,index,0
0,0,a
1,1,b
2,2,c
3,3,e
4,4,d
5,5,f
6,6,g
7,7,h
8,8,i
9,9,j


In [6]:
#Ex3: How to get the items of series A not present in series B?
ser1 = pd.Series([1,2,3,4,5])
ser2 = pd.Series([4,5,6,7,8])

result = ser1[~ser1.isin(ser2)]
result

0    1
1    2
2    3
dtype: int64

In [19]:
#Ex4: How to get the items not common to both series A and series B?
ser1 = pd.Series([1,2,3,4,5])
ser2 = pd.Series([4,5,6,7,8])

uni = pd.Series(np.union1d(ser1, ser2))
inter = pd.Series(np.intersect1d(ser1, ser2))
uni[~uni.isin(inter)]

0    1
1    2
2    3
5    6
6    7
7    8
dtype: int64

In [23]:
#Ex5: How to get useful infos
state = np.random.RandomState(100)
ser = pd.Series(state.normal(10, 5, 25))

ser.describe()

count    25.000000
mean     10.435437
std       4.253118
min       1.251173
25%       7.709865
50%      10.922593
75%      13.363604
max      18.094908
dtype: float64

In [27]:
#Ex6: How to get frequency counts of unique items of a series?
ser = pd.Series(np.take(list('abcdefgh'), np.random.randint(8, size=30)))
ser.value_counts()

h    7
g    6
e    5
b    4
c    2
a    2
f    2
d    2
Name: count, dtype: int64

In [31]:
#Ex7: How to convert a numpy array to a dataframe of given shape? (L1)
ser = pd.Series(np.random.randint(1, 10, 35))
arr = np.array(ser)
pd.DataFrame(arr.reshape(7,5))

Unnamed: 0,0,1,2,3,4
0,2,6,2,2,2
1,3,6,7,5,5
2,7,9,5,9,7
3,8,3,5,9,2
4,3,6,2,9,2
5,4,9,9,9,3
6,6,7,9,3,6


In [38]:
#Ex8: How to find the positions of numbers that are multiples of 3 from a series?
np.random.RandomState(100)
ser = pd.Series(np.random.randint(1, 5, 10))
ser[ser%3 == 0]

0    3
7    3
8    3
9    3
dtype: int64

In [39]:
#Ex9: How to extract items at given positions from a series
ser = pd.Series(list('abcdefghijklmnopqrstuvwxyz'))
pos = [0, 4, 8, 14, 20]

ser.iloc[pos]

0     a
4     e
8     i
14    o
20    u
dtype: object

In [42]:
#10: How to stack two series vertically and horizontally)
ser1 = pd.Series(range(5))
ser2 = pd.Series(list('abcde'))

#Vertically 
pd.concat([ser1,ser2], axis = 0)
#Horizontally
pd.concat([ser1,ser2], axis = 1)

Unnamed: 0,0,1
0,0,a
1,1,b
2,2,c
3,3,d
4,4,e


In [46]:
#Ex11: How to get the positions of items of series A in another series B?
ser1 = pd.Series([10, 9, 6, 5, 3, 1, 12, 8, 13])
ser2 = pd.Series([1, 3, 10, 13])

ser1[ser1.isin(ser2)].index

Index([0, 4, 5, 8], dtype='int64')

In [56]:
#12. How to compute difference of differences between consequtive numbers of a series?
ser = pd.Series([1, 3, 6, 10, 15, 21, 27, 35])

# Desired Output
# [nan, 2.0, 3.0, 4.0, 5.0, 6.0, 6.0, 8.0]
# [nan, nan, 1.0, 1.0, 1.0, 1.0, 0.0, 2.0]

print(ser.diff().tolist())
print(ser.diff().diff().tolist())

[nan, 2.0, 3.0, 4.0, 5.0, 6.0, 6.0, 8.0]
[nan, nan, 1.0, 1.0, 1.0, 1.0, 0.0, 2.0]


In [59]:
#13. How to convert a series of date-strings to a timeseries?

ser = pd.Series(['01 Jan 2010', '02-02-2011', '20120303', '2013/04/04', '2014-05-05', '2015-06-06T12:20'])


'''
Desired Output

0   2010-01-01 00:00:00
1   2011-02-02 00:00:00
2   2012-03-03 00:00:00
3   2013-04-04 00:00:00
4   2014-05-05 00:00:00
5   2015-06-06 12:20:00
'''

pd.to_datetime(ser, format='mixed')

0   2010-01-01 00:00:00
1   2011-02-02 00:00:00
2   2012-03-03 00:00:00
3   2013-04-04 00:00:00
4   2014-05-05 00:00:00
5   2015-06-06 12:20:00
dtype: datetime64[ns]

In [71]:
#14. How to filter words that contain atleast 2 vowels from a series?
ser = pd.Series(['Apple', 'Orange', 'Plan', 'Python', 'Money'])

'''
Desired Output


0     Apple
1    Orange
4     Money
dtype: object
'''

vowels = ['a','e','o','u','i']
def count_vow(w):
    count = 0
    for c in w.lower():
        if c in vowels:
            count += 1
    return count >= 2

ser[ser.apply(lambda x: count_vow(x))]

0     Apple
1    Orange
4     Money
dtype: object

In [79]:
#15. How to replace missing spaces in a string with the least frequent character?
my_str = 'dbc deb abed ggade'

'''
Desired Output
'dbccdebcabedcggade'  # least frequent is 'c'
'''
from collections import Counter
counter = Counter(my_str.lower().replace(" ", ""))
least_freq_c = min(counter, key=counter.get)
my_str.replace(" ", least_freq_c)

'dbccdebcabedcggade'

In [None]:
#16. How to change column values when importing csv to a dataframe?
import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))