In [1]:
import pandas as pd
import numpy as np

### Numpy Array Operations

#### stacking of 2 arrays 

In [11]:
a = np.array([[1,2],[3,4]]) 

print('First Array:') 
print(a,'\n') 
b = np.array([[5,6],[7,8]]) 

print('Second Array:')
print(b,'\n')   

print('Stack the two arrays along axis 0:')
print(np.stack((a,b),0),'\n')

print('Stack the two arrays along axis 1:')
print(np.stack((a,b),1))

print('Default stacking:')
print(np.stack((a,b)))

First Array:
[[1 2]
 [3 4]] 

Second Array:
[[5 6]
 [7 8]] 

Stack the two arrays along axis 0:
[[[1 2]
  [3 4]]

 [[5 6]
  [7 8]]] 

Stack the two arrays along axis 1:
[[[1 2]
  [5 6]]

 [[3 4]
  [7 8]]]
Default stacking:
[[[1 2]
  [3 4]]

 [[5 6]
  [7 8]]]


In [36]:
# list stacking
lst1 = [[1,2],[3,4],[5,6]]
lst2 = [[7,8],[9,10],[11],[12,13,14]]
np.stack(lst1)

array([[1, 2],
       [3, 4],
       [5, 6]])

In [37]:
lst1 + lst2

[[1, 2], [3, 4], [5, 6], [7, 8], [9, 10], [11], [12, 13, 14]]

In [38]:
lst1.append(lst2)
lst1

[[1, 2], [3, 4], [5, 6], [[7, 8], [9, 10], [11], [12, 13, 14]]]

#### Array Reshaping

In [43]:
# Reshape From 1-D to 2-D
arr = np.array([1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12])
newarr = arr.reshape(4, 3)
print(newarr)

[[ 1  2  3]
 [ 4  5  6]
 [ 7  8  9]
 [10 11 12]]


In [45]:
# Reshape From 1-D to 3-D
arr = np.array([1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12])
newarr = arr.reshape(2, 3, 2)
print(newarr)

[[[ 1  2]
  [ 3  4]
  [ 5  6]]

 [[ 7  8]
  [ 9 10]
  [11 12]]]


In [47]:
array = np.arange(8)
print("Original array : \n", array)
  
# shape array with 2 rows and 4 columns
array = np.arange(8).reshape(2, 4)
print("\narray reshaped with 2 rows and 4 columns : \n", array)
  
# shape array with 4 rows and 2 columns
array = np.arange(8).reshape(4 ,2)
print("\narray reshaped with 2 rows and 4 columns : \n", array)
  
# Constructs 3D array
array = np.arange(8).reshape(2, 2, 2)
print("\nOriginal array reshaped to 3D : \n", array)

Original array : 
 [0 1 2 3 4 5 6 7]

array reshaped with 2 rows and 4 columns : 
 [[0 1 2 3]
 [4 5 6 7]]

array reshaped with 2 rows and 4 columns : 
 [[0 1]
 [2 3]
 [4 5]
 [6 7]]

Original array reshaped to 3D : 
 [[[0 1]
  [2 3]]

 [[4 5]
  [6 7]]]


#### Repeat a sequence (np.repeat and np.tile)

np.repeat: repeats individual elements \n
np.tile: repeats entire sequence

In [54]:
np.repeat(  list(np.arange(0,35,1))  ,3)

array([ 0,  0,  0,  1,  1,  1,  2,  2,  2,  3,  3,  3,  4,  4,  4,  5,  5,
        5,  6,  6,  6,  7,  7,  7,  8,  8,  8,  9,  9,  9, 10, 10, 10, 11,
       11, 11, 12, 12, 12, 13, 13, 13, 14, 14, 14, 15, 15, 15, 16, 16, 16,
       17, 17, 17, 18, 18, 18, 19, 19, 19, 20, 20, 20, 21, 21, 21, 22, 22,
       22, 23, 23, 23, 24, 24, 24, 25, 25, 25, 26, 26, 26, 27, 27, 27, 28,
       28, 28, 29, 29, 29, 30, 30, 30, 31, 31, 31, 32, 32, 32, 33, 33, 33,
       34, 34, 34])

In [72]:
an_array = np.array([1,2,3])
repeats_array = np.tile(an_array, 3)
repeats_array_1 = np.tile(an_array, (5,1))

print('array:',an_array,
      '\n repeated array:',repeats_array,
     '\n repeated array:\n',repeats_array_1)

array: [1 2 3] 
 repeated array: [1 2 3 1 2 3 1 2 3] 
 repeated array:
 [[1 2 3]
 [1 2 3]
 [1 2 3]
 [1 2 3]
 [1 2 3]]


### Pandas

#### melt:  wide to long

melt: to change the DataFrame format from wide to long, column names are stored in 'variable' column and values are stored in 'value' column 

In [75]:
d1 = {"Name": ["Pankaj", "Lisa", "David"], 
      "ID": [1, 2, 3], 
      "Role": ["CEO", "Editor", "Author"],
      "Salary":[50000,20000,30000]}

df = pd.DataFrame(d1)

print(df,'\n')

df_melted = pd.melt(df, id_vars=["ID"], value_vars=["Name", "Role","Salary"])

print(df_melted)

     Name  ID    Role  Salary
0  Pankaj   1     CEO   50000
1    Lisa   2  Editor   20000
2   David   3  Author   30000 

   ID variable   value
0   1     Name  Pankaj
1   2     Name    Lisa
2   3     Name   David
3   1     Role     CEO
4   2     Role  Editor
5   3     Role  Author
6   1   Salary   50000
7   2   Salary   20000
8   3   Salary   30000


In [79]:
# Multiple Columns as id_vars
df_melted_1 = pd.melt(df, id_vars=["ID","Name"], value_vars=["Role","Salary"], var_name="Attribute", value_name="Value")
df_melted_1

Unnamed: 0,ID,Name,Attribute,Value
0,1,Pankaj,Role,CEO
1,2,Lisa,Role,Editor
2,3,David,Role,Author
3,1,Pankaj,Salary,50000
4,2,Lisa,Salary,20000
5,3,David,Salary,30000


#### Pivot: Unmelting DataFrame using pivot() function

pivot() function to unmelt a DataFrame object and get the original dataframe. The pivot() function ‘index’ parameter value should be same as the ‘id_vars’ value. The ‘columns’ value should be passed as the name of the ‘variable’ column.

In [93]:
df_unmelted = df_melted.pivot(index='ID', columns='variable')
print(df_unmelted,'\n')

print(df_unmelted.reset_index())

           value               
variable    Name    Role Salary
ID                             
1         Pankaj     CEO  50000
2           Lisa  Editor  20000
3          David  Author  30000 

         ID   value               
variable       Name    Role Salary
0         1  Pankaj     CEO  50000
1         2    Lisa  Editor  20000
2         3   David  Author  30000


In [94]:
df_unmelted = df_unmelted['value'].reset_index()
df_unmelted.columns.name = None
print(df_unmelted)

   ID    Name    Role Salary
0   1  Pankaj     CEO  50000
1   2    Lisa  Editor  20000
2   3   David  Author  30000
