##### https://pandas.pydata.org/docs/user_guide/index.html

# 1. <a href="https://pandas.pydata.org/docs/getting_started/10min.html#min">10min to panda</a>
2. <a href="https://www.machinelearningplus.com/python/101-pandas-exercises-python/">Questions</a>

##### Pandas DataStructures
1. <a href="https://pandas.pydata.org/docs/getting_started/dsintro.html#series">Series</a>
2. <a href="https://pandas.pydata.org/docs/getting_started/dsintro.html#dataframe">DataFrame</a>

In [3]:
!pip3 install pandas

Collecting pandas
  Downloading https://files.pythonhosted.org/packages/d9/02/efd55383399646d0bc3bf0078130ae08f2890dd68276e3f4d7a4e94539a4/pandas-1.0.1-cp38-cp38-win32.whl (7.8MB)
Collecting pytz>=2017.2 (from pandas)
  Using cached https://files.pythonhosted.org/packages/e7/f9/f0b53f88060247251bf481fa6ea62cd0d25bf1b11a87888e53ce5b7c8ad2/pytz-2019.3-py2.py3-none-any.whl
Installing collected packages: pytz, pandas
Successfully installed pandas-1.0.1 pytz-2019.3


You should consider upgrading via the 'python -m pip install --upgrade pip' command.


In [3]:
#1 How to import pandas and check the version?
import numpy as np
import pandas as pd
print(pd.__version__)

1.0.1


In [3]:
#2 Create a pandas series from each of the items below: a list, numpy and a dictionary
mylist = list('abcedfghijklmnopqrstuvwxyz')
myarr = np.arange(26)
mydict = dict(zip(mylist, myarr))
mylist_s = pd.Series(mylist)
print(mylist_s)
myarr_s = pd.Series(myarr)
print(myarr_s)
mydict_s = pd.Series(mydict)
print(mydict_s)

0     a
1     b
2     c
3     e
4     d
5     f
6     g
7     h
8     i
9     j
10    k
11    l
12    m
13    n
14    o
15    p
16    q
17    r
18    s
19    t
20    u
21    v
22    w
23    x
24    y
25    z
dtype: object
0      0
1      1
2      2
3      3
4      4
5      5
6      6
7      7
8      8
9      9
10    10
11    11
12    12
13    13
14    14
15    15
16    16
17    17
18    18
19    19
20    20
21    21
22    22
23    23
24    24
25    25
dtype: int32
a     0
b     1
c     2
e     3
d     4
f     5
g     6
h     7
i     8
j     9
k    10
l    11
m    12
n    13
o    14
p    15
q    16
r    17
s    18
t    19
u    20
v    21
w    22
x    23
y    24
z    25
dtype: int64


In [9]:
#3 Convert the series ser into a dataframe with its index as another column on the dataframe.
mylist = list('abcedfghijklmnopqrstuvwxyz')
myarr = np.arange(26)
mydict = dict(zip(mylist, myarr))
ser = pd.Series(mydict)
df = ser.to_frame().reset_index()
print(df)

   index   0
0      a   0
1      b   1
2      c   2
3      e   3
4      d   4
5      f   5
6      g   6
7      h   7
8      i   8
9      j   9
10     k  10
11     l  11
12     m  12
13     n  13
14     o  14
15     p  15
16     q  16
17     r  17
18     s  18
19     t  19
20     u  20
21     v  21
22     w  22
23     x  23
24     y  24
25     z  25


In [12]:
#4 Combine ser1 and ser2 to form a dataframe
ser1 = pd.Series(list('abcedfghijklmnopqrstuvwxyz'))
ser2 = pd.Series(np.arange(26))
df = pd.DataFrame({'ser1':ser1,'ser2':ser2})
'''
Another way:
df = pd.concat([ser1, ser2], axis=1)
'''
print(df)

   ser1  ser2
0     a     0
1     b     1
2     c     2
3     e     3
4     d     4
5     f     5
6     g     6
7     h     7
8     i     8
9     j     9
10    k    10
11    l    11
12    m    12
13    n    13
14    o    14
15    p    15
16    q    16
17    r    17
18    s    18
19    t    19
20    u    20
21    v    21
22    w    22
23    x    23
24    y    24
25    z    25


In [15]:
#5 Give a name to the series ser calling it ‘alphabets’.
ser = pd.Series(list('abcdefghijklmnopqrstuvwxyz'),name='alphabets')
'''
Another way:
ser.name="alphabetsList"
'''
print(ser)

0     a
1     b
2     c
3     d
4     e
5     f
6     g
7     h
8     i
9     j
10    k
11    l
12    m
13    n
14    o
15    p
16    q
17    r
18    s
19    t
20    u
21    v
22    w
23    x
24    y
25    z
Name: alphabets, dtype: object


In [21]:
#6 From ser1 remove items present in ser2
ser1 = pd.Series([1, 2, 3, 4, 5])
ser2 = pd.Series([4, 5, 6, 7, 8])
new_ser1 = ser1[~ser1.isin(ser2)] 
print(new_ser1)

0    1
1    2
2    3
dtype: int64


In [25]:
#7 Get all items of ser1 and ser2 not common to both.
ser1 = pd.Series([1, 2, 3, 4, 5])
ser2 = pd.Series([4, 5, 6, 7, 8])
not_common=pd.concat([ser1[~ser1.isin(ser2)],ser2[~ser2.isin(ser1)]])
'''
Using set operations in numpy.
ser_u = pd.Series(np.union1d(ser1, ser2))  # union
ser_i = pd.Series(np.intersect1d(ser1, ser2))  # intersect
ser_u[~ser_u.isin(ser_i)]
'''
print(not_common)

0    1
1    2
2    3
2    6
3    7
4    8
dtype: int64


In [33]:
#8 Compute the minimum, 25th percentile, median, 75th, and maximum of ser.
state=np.random.RandomState(100)
ser = pd.Series(state.normal(10, 5, 25))
percentile = np.percentile(ser,q=[0,25,50,75,100])
print(percentile)

[ 1.25117263  7.70986507 10.92259345 13.36360403 18.0949083 ]


In [35]:
#9 Calculte the frequency counts of each unique value ser.
ser = pd.Series(np.take(list('abcdefgh'), np.random.randint(8, size=30)))
counts = ser.value_counts()
print(counts)

e    7
f    6
h    4
a    3
c    3
b    3
g    2
d    2
dtype: int64


In [49]:
#10 From ser, keep the top 2 most frequent items as it is and replace everything else as ‘Other’.
np.random.RandomState(100)
ser = pd.Series(np.random.randint(1, 5, [12]))
counts = ser.value_counts()
print(counts)
print(ser)
print(ser[~ser.isin(counts[:2].index)])
ser[~ser.isin(counts[:2].index)]=-1
print(ser)

3    4
2    4
1    3
4    1
dtype: int64
0     4
1     3
2     2
3     1
4     3
5     1
6     3
7     1
8     2
9     2
10    2
11    3
dtype: int32
0    4
3    1
5    1
7    1
dtype: int32
0    -1
1     3
2     2
3    -1
4     3
5    -1
6     3
7    -1
8     2
9     2
10    2
11    3
dtype: int32


In [1]:
#11 Bin the series ser into 10 equal deciles and replace the values with the bin name.
# -- Need to look

In [4]:
#12 Reshape the series ser into a dataframe with 7 rows and 5 columns
ser = pd.Series(np.random.randint(1, 10, 35))
df = pd.DataFrame(ser.values.reshape(7,5))
print(df)

   0  1  2  3  4
0  7  9  9  5  6
1  1  6  7  7  2
2  9  2  6  5  6
3  9  7  6  9  7
4  8  7  5  5  2
5  6  7  5  3  4
6  3  7  6  2  5


In [14]:
#13 How to find the positions of numbers that are multiples of 3 from a series?
ser = pd.Series(np.random.randint(1, 10, 7))
print(ser)
positions = np.argwhere(ser.values % 3 == 0)
print(positions)

0    8
1    2
2    6
3    9
4    7
5    5
6    3
dtype: int32
[[2]
 [3]
 [6]]


In [15]:
#14 From ser, extract the items at positions in list pos.
ser = pd.Series(list('abcdefghijklmnopqrstuvwxyz'))
pos = [0, 4, 8, 14, 20]
elems = ser[pos]
print(elems)

0     a
4     e
8     i
14    o
20    u
dtype: object


In [18]:
#15 Stack ser1 and ser2 vertically and horizontally (to form a dataframe)
ser1 = pd.Series(range(5))
ser2 = pd.Series(list('abcde'))
vertical = ser1.append(ser2)
print(vertical)
horizontal = pd.concat([ser1,ser2],axis=1)
print(horizontal)

0    0
1    1
2    2
3    3
4    4
0    a
1    b
2    c
3    d
4    e
dtype: object
   0  1
0  0  a
1  1  b
2  2  c
3  3  d
4  4  e


In [25]:
#16 Get the positions of items of ser2 in ser1 as a list.
ser1 = pd.Series([10, 9, 6, 5, 3, 1, 12, 8, 13])
ser2 = pd.Series([1, 3, 10, 13])
print(np.where(ser1.isin(ser2)))

(array([0, 4, 5, 8], dtype=int32),)
