<a href="https://colab.research.google.com/github/sugatoray/CodeSnippets/blob/master/Code_Snippets_Map_Dict_List.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from sklearn.model_selection import train_test_split
from sklearn.model_selection import KFold # K-Fold validation

# PCA related
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler

import os, time, json

# progressbar
from tqdm import tqdm, tqdm_notebook, tnrange

%matplotlib inline
%config InlineBackend.figure_format = 'svg' # 'svg', 'retina'
plt.style.use('seaborn-white')

# Map

In [2]:
def pwr(x,n=2):
    return x**n
list(map(lambda x: pwr(x,n=2), [1,2,3,4,5]))

[1, 4, 9, 16, 25]

# Dict

## Make a dict from two lists

In [3]:
keys = np.arange(8)
values = np.arange(1,9)*50
dict(zip(keys, values))

{0: 50, 1: 100, 2: 150, 3: 200, 4: 250, 5: 300, 6: 350, 7: 400}

# Pandas

## DataFrame Methods

In [4]:
x = np.arange(100)**2
df = pd.DataFrame(x)

### DataFrame.head()

This shows top 5 rows of the dataframe. If you would like to see top 10 rows, do the following:  
```python
df.head(10)
```

### DataFrame.describe()

This generates a descriptive statistics output of the dataframe.

In [5]:
df.describe()

Unnamed: 0,0
count,100.0
mean,3283.5
std,2968.174804
min,0.0
25%,612.75
50%,2450.5
75%,5513.25
max,9801.0


### DataFrame.stack() and .unstack()

These methods stack and unstack a dataframe as shown below.

#### `stack()`

#### `unstack()`

#### `crosstab()`

Create dataframe for crosstab.

In [6]:
def _create_df():
    """
    This dataframe will be used to create a crosstab
    """
    B = np.array(list('abracadabra'))
    A = np.arange(len(B))
    AB = list()
    for i in range(20):
        a = np.random.randint(1,10)
        b = np.random.randint(1,10)
        AB += [(a,b)]
    AB = np.unique(np.array(AB), axis=0)
    AB = np.unique(np.array(list(zip(A[AB[:,0]], B[AB[:,1]]))), axis=0)
    AB_df = pd.DataFrame({'ID': AB[:,0], 'Label': AB[:,1]})
    return AB_df

AB_df = _create_df()
AB_df

Unnamed: 0,ID,Label
0,1,a
1,1,c
2,2,a
3,2,b
4,2,r
5,3,a
6,3,b
7,4,a
8,4,r
9,5,r


Make a crosstab.

In [7]:
AB_crosstab = pd.crosstab(AB_df['ID'], AB_df['Label']).reindex()
AB_crosstab

Label,a,b,c,d,r
ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
1,1,0,1,0,0
2,1,1,0,0,1
3,1,1,0,0,0
4,1,0,0,0,1
5,0,0,0,0,1
6,0,1,0,1,0
8,1,0,1,1,1
9,1,0,1,0,1


##### Inversion of Crosstab: Retreive Original DataFrame

Retrieve original dataframe from the crosstab.

In [8]:
def invert_crosstab(crosstab_df, asDataFrame = True):
    row_labels = crosstab_df.index
    column_labels = crosstab_df.columns
    crosstab_array = crosstab_df.to_numpy()
    accumulator = list()
    for i, row_label in enumerate(row_labels):
        row_array = crosstab_array[i,:]
        for column_label in column_labels[row_array.astype(bool)]:
            accumulator.append((row_label, column_label))    

    accumulator = np.array(accumulator)
    accumulator_df = pd.DataFrame(accumulator, columns = [row_labels.name, column_labels.name])
    if asDataFrame:
        return accumulator_df
    else:
        return accumulator
    
accumulator_df = invert_crosstab(crosstab_df = AB_crosstab, asDataFrame = True)
accumulator_df

Unnamed: 0,ID,Label
0,1,a
1,1,c
2,2,a
3,2,b
4,2,r
5,3,a
6,3,b
7,4,a
8,4,r
9,5,r


Assert if **retrieved** == **original**.

In [9]:
np.alltrue(AB_df == accumulator_df)

True

# Other Stuff