In [3]:
import pandas as pd
import numpy as np

data1={
    'ID':[1,2,3,4],
    'Name':['Abhishek','Lavanaya','Prapti','Harry'],
    'Age':[25,30,35,40]
}

data2={
    'ID':[3,4,5,6],
    'Gender':['F','M','M','F'],
    'Salary':[70000,80000,50000,60000]
}

In [4]:
df1=pd.DataFrame(data1)
df2=pd.DataFrame(data2)

In [6]:
tight_coupling=pd.merge(df1,df2,on='ID',how='inner')
print("Tight Coupling Result :\n",tight_coupling)

Tight Coupling Result :
    ID    Name  Age Gender  Salary
0   3  Prapti   35      F   70000
1   4   Harry   40      M   80000


In [7]:
loose_coupling=pd.concat([df1.set_index('ID'),df2.set_index('ID')],axis=1).reset_index()
print("Loose Coupling Result: \n",loose_coupling)

Loose Coupling Result: 
    ID      Name   Age Gender   Salary
0   1  Abhishek  25.0    NaN      NaN
1   2  Lavanaya  30.0    NaN      NaN
2   3    Prapti  35.0      F  70000.0
3   4     Harry  40.0      M  80000.0
4   5       NaN   NaN      M  50000.0
5   6       NaN   NaN      F  60000.0


In [8]:
loose_coupling['Smoothed_Age']=loose_coupling['Age'].rolling(window=2,min_periods=1).mean()
print("Smoothing: \n",loose_coupling[['ID','Age','Smoothed_Age']])

Smoothing: 
    ID   Age  Smoothed_Age
0   1  25.0          25.0
1   2  30.0          27.5
2   3  35.0          32.5
3   4  40.0          37.5
4   5   NaN          40.0
5   6   NaN           NaN


In [10]:
aggregation=loose_coupling.groupby('Gender')['Salary'].sum().reset_index()
print("Aggregration:\n",aggregation)

Aggregration:
   Gender    Salary
0      F  130000.0
1      M  130000.0


In [11]:
bins = [0, 20, 30, 40, 50]
labels = ['Teen', 'Young Adult', 'Adult', 'Senior']
loose_coupling['Age_Group'] = pd.cut(loose_coupling['Age'], bins=bins, labels=labels)
print("\nDiscretization:\n", loose_coupling[['ID', 'Age', 'Age_Group']])



Discretization:
    ID   Age    Age_Group
0   1  25.0  Young Adult
1   2  30.0  Young Adult
2   3  35.0        Adult
3   4  40.0        Adult
4   5   NaN          NaN
5   6   NaN          NaN


In [12]:
loose_coupling['Age_Salary_Ratio']=loose_coupling['Age']/loose_coupling['Salary']
print("Attribute Construction: \n",loose_coupling[['ID','Age','Salary','Age_Salary_Ratio']])

Attribute Construction: 
    ID   Age   Salary  Age_Salary_Ratio
0   1  25.0      NaN               NaN
1   2  30.0      NaN               NaN
2   3  35.0  70000.0            0.0005
3   4  40.0  80000.0            0.0005
4   5   NaN  50000.0               NaN
5   6   NaN  60000.0               NaN


In [13]:
salary_bins=[0,60000,80000,10000]
salary_labels=['Low','Medium','High']
loose_coupling['Salary_Range']=pd.cut(loose_coupling['Salary'],bins=salary_bins,labels=salary_labels)
print("\nGeneralization:\n", loose_coupling[['ID', 'Salary', 'Salary_Range']])

ValueError: bins must increase monotonically.

In [14]:
salary_bins = [0, 60000, 80000, 100000]
salary_labels = ['Low', 'Medium', 'High']
loose_coupling['Salary_Range'] = pd.cut(loose_coupling['Salary'], bins=salary_bins, labels=salary_labels)
print("\nGeneralization:\n", loose_coupling[['ID', 'Salary', 'Salary_Range']])


Generalization:
    ID   Salary Salary_Range
0   1      NaN          NaN
1   2      NaN          NaN
2   3  70000.0       Medium
3   4  80000.0       Medium
4   5  50000.0          Low
5   6  60000.0          Low


In [15]:
loose_coupling['Age_MinMax']=(loose_coupling['Age']-loose_coupling['Age'].min())
print("Min-Max normalization:\n",loose_coupling[['ID','Age','Age_MinMax']])

Min-Max normalization:
    ID   Age  Age_MinMax
0   1  25.0         0.0
1   2  30.0         5.0
2   3  35.0        10.0
3   4  40.0        15.0
4   5   NaN         NaN
5   6   NaN         NaN


In [16]:
### Z-Score Normalization
loose_coupling['Age_ZScore'] = (loose_coupling['Age'] - loose_coupling['Age'].mean()) / loose_coupling['Age'].std()
print("\nZ-Score Normalization:\n", loose_coupling[['ID', 'Age', 'Age_ZScore']])


Z-Score Normalization:
    ID   Age  Age_ZScore
0   1  25.0   -1.161895
1   2  30.0   -0.387298
2   3  35.0    0.387298
3   4  40.0    1.161895
4   5   NaN         NaN
5   6   NaN         NaN


In [19]:
scaling_factor=10**np.ceil(np.log10(loose_coupling['Age'].abs().max()))
loose_coupling['Age_DecimalScaling']=loose_coupling['Age']/scaling_factor
print("Decimal scaling:\n",loose_coupling[['ID','Age','Age_DecimalScaling']])

Decimal scaling:
    ID   Age  Age_DecimalScaling
0   1  25.0                0.25
1   2  30.0                0.30
2   3  35.0                0.35
3   4  40.0                0.40
4   5   NaN                 NaN
5   6   NaN                 NaN
