### PANDAS

In [2]:
import pandas as pd
import numpy as np

In [3]:
np.__version__

'1.25.2'

### How to create a series from a list, numpy array and dict?

In [4]:
mylist = list('abcedfghijklmnopqrstuvwxyz')
myarr = np.arange(26)
mydict = dict(zip(mylist, myarr)) # zip() will merge elements from the 2 iterables, and then dict() will transform it into a dictionary.

In [5]:
print(mylist)
print(myarr)
print(mydict)

['a', 'b', 'c', 'e', 'd', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z']
[ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
 24 25]
{'a': 0, 'b': 1, 'c': 2, 'e': 3, 'd': 4, 'f': 5, 'g': 6, 'h': 7, 'i': 8, 'j': 9, 'k': 10, 'l': 11, 'm': 12, 'n': 13, 'o': 14, 'p': 15, 'q': 16, 'r': 17, 's': 18, 't': 19, 'u': 20, 'v': 21, 'w': 22, 'x': 23, 'y': 24, 'z': 25}


#### Series in Pandas take form of columns with data

In [6]:
ser1= pd.Series(mylist) # Applying a Pandas's series into a list will create an indexed structure with an index for each element on the list.
ser2= pd.Series(myarr)  # Same for an array. There will be an indes for each element of the array.
ser3= pd.Series(mydict) # As per a dictionary, the index's place will be taken by the dictionary's key, and next to each, their values.

In [7]:
print(ser1)

0     a
1     b
2     c
3     e
4     d
5     f
6     g
7     h
8     i
9     j
10    k
11    l
12    m
13    n
14    o
15    p
16    q
17    r
18    s
19    t
20    u
21    v
22    w
23    x
24    y
25    z
dtype: object


In [8]:
print(ser2)

0      0
1      1
2      2
3      3
4      4
5      5
6      6
7      7
8      8
9      9
10    10
11    11
12    12
13    13
14    14
15    15
16    16
17    17
18    18
19    19
20    20
21    21
22    22
23    23
24    24
25    25
dtype: int64


In [9]:
print(ser3)

a     0
b     1
c     2
e     3
d     4
f     5
g     6
h     7
i     8
j     9
k    10
l    11
m    12
n    13
o    14
p    15
q    16
r    17
s    18
t    19
u    20
v    21
w    22
x    23
y    24
z    25
dtype: int64


### How to convert the index of a series into a column of a dataframe?

In [10]:
ser= ser3.copy()
ser

a     0
b     1
c     2
e     3
d     4
f     5
g     6
h     7
i     8
j     9
k    10
l    11
m    12
n    13
o    14
p    15
q    16
r    17
s    18
t    19
u    20
v    21
w    22
x    23
y    24
z    25
dtype: int64

In [11]:
df= ser.to_frame().reset_index()
        # to_frame() converts a Series into a Dataframe. Basically, it turns a unidirectional structure into a bidimensional one.
        # Essentially, the index of the Series becomes one of the columns of the dataframe.
        # reset_index() will add a new column with the indexes starting from zero to n-1

In [12]:
print(df.head())    # head() will showcase the first 5 rows of the dataframe by default.
                    # we can also specify the amount of rows we want to showcase by inputting an integer as argument to the function. eg: .head(10)

  index  0
0     a  0
1     b  1
2     c  2
3     e  3
4     d  4


### How to combine many series to form a dataframe?

In [13]:
ser1 = pd.Series(list('abcedfghijklmnopqrstuvwxyz'))
ser2 = pd.Series(np.arange(26))

In [14]:
df1= pd.DataFrame({'col1': ser1, 'col2': ser2})
df1.head()

Unnamed: 0,col1,col2
0,a,0
1,b,1
2,c,2
3,e,3
4,d,4


### How to assign name to the series’ index?

In [15]:
ser = pd.Series(list('abcedfghijklmnopqrstuvwxyz'))
ser.head()

0    a
1    b
2    c
3    e
4    d
dtype: object

In [16]:
ser.name= 'alphabet'    # .name is used as a descriptive reference to Series or Dataframes in Pandas.
ser.head()              # It is useful when performing operations or joining DataFrames.

0    a
1    b
2    c
3    e
4    d
Name: alphabet, dtype: object

### How to get the items of series A not present in series B?

In [20]:
A = pd.Series([1, 2, 3, 4, 5])
B = pd.Series([4, 5, 6, 7, 8])

In [21]:
A[~A.isin(B)] # '~' is negating the 'isin' built-in function, meaning it's looking for the values in A that are NOT present in B.

0    1
1    2
2    3
dtype: int64