In [1]:
import pandas as pd

Camera = pd.read_csv("camera.csv")
Memory = pd.read_csv("memory.csv")
Screen = pd.read_csv("screen.csv")
TechSpec = pd.read_csv("techspec.csv")
Sales = pd.read_csv("sales.csv")

In [2]:
Camera.head()

Unnamed: 0,CameraID,pc,fc
0,10001,16,14
1,10002,12,4
2,10003,4,1
3,10004,20,18
4,10005,18,11


In [7]:
Camera.dtypes

CameraID    int64
pc          int64
fc          int64
dtype: object

In [4]:
Screen.head()

Unnamed: 0,ScreenID,sc_h,sc_w,px_width,px_height
0,20001,12,7,1412,226.0
1,20002,6,0,857,746.0
2,20003,17,10,1366,1270.0
3,20004,10,0,1752,295.0
4,20005,15,8,810,638.5933


In [8]:
Screen.dtypes

ScreenID       int64
sc_h           int64
sc_w           int64
px_width       int64
px_height    float64
dtype: object

In [3]:
Memory.head()

Unnamed: 0,MemoryID,ram,int_memory
0,30001,3476,5.0
1,30002,3895,61.0
2,30003,2396,27.0
3,30004,3893,25.0
4,30005,1773,49.0


In [9]:
Memory.dtypes

MemoryID        int64
ram             int64
int_memory    float64
dtype: object

In [5]:
TechSpec.head()

Unnamed: 0,TechSpecID,mobile_wt,m_dep,battery_power,touch_screen,clock_speed,n_cores,wifi,blue,dual_sim,four_g,three_g,talk_time
0,40001,193,0.1,1043,True,1.8,3,False,True,True,False,False,2
1,40002,191,0.8,841,False,0.5,5,False,True,True,True,True,7
2,40003,186,0.9,1807,True,2.8,3,True,True,False,False,False,10
3,40004,96,0.5,1546,True,0.5,8,False,False,True,True,True,7
4,40005,108,0.5,1434,False,1.4,6,True,False,False,True,True,7


In [10]:
TechSpec.dtypes

TechSpecID         int64
mobile_wt          int64
m_dep            float64
battery_power      int64
touch_screen        bool
clock_speed      float64
n_cores            int64
wifi                bool
blue                bool
dual_sim            bool
four_g              bool
three_g             bool
talk_time          int64
dtype: object

In [6]:
Sales.head()

Unnamed: 0,SalesID,date,price_range,CameraID,ScreenID,MemoryID,TechSpecID
0,50001,11/2/2022,2,10001,20001,30001,40001
1,50002,11/2/2022,2,10002,20002,30002,40002
2,50003,11/2/2022,2,10003,20003,30003,40003
3,50004,11/2/2022,2,10004,20004,30004,40004
4,50005,11/2/2022,2,10005,20005,30005,40005


In [11]:
Sales.dtypes

SalesID         int64
date           object
price_range     int64
CameraID        int64
ScreenID        int64
MemoryID        int64
TechSpecID      int64
dtype: object

**Implement Featuretools**

In [14]:
import featuretools as ft

In [15]:
#denormalize the table
denormalized = Sales.join(Camera.set_index('CameraID'), on="CameraID")
denormalized = denormalized.join(Screen.set_index("ScreenID"), on="ScreenID")
denormalized = denormalized.join(Memory.set_index("MemoryID"), on="MemoryID")
denormalized = denormalized.join(TechSpec.set_index("TechSpecID"), on="TechSpecID")
denormalized = denormalized.reindex(columns = ["SalesID", "CameraID", "ScreenID", "MemoryID", "TechSpecID", 
                                               "pc", "fc", "sc_h", "sc_w", "px_width", "px_height", "ram",
                                               "int_memory", "mobile_wt", "m_dep", "battery_power", "touch_screen",
                                               "clock_speed", "n_cores", "wifi", "blue", "dual_sim",
                                               "four_g", "three_g", "talk_time", "date", "price_range"])

#creating an entity set 'mobile'
mobile = ft.EntitySet(id="mobile")

#adding a entities || adding dataframe
mobile.add_dataframe(dataframe_name="Sales", dataframe = denormalized, index='SalesID')

Entityset: mobile
  DataFrames:
    Sales [Rows: 3001, Columns: 27]
  Relationships:
    No relationships

**Establish Relationship**

In [17]:
#normalize the dataset into multiple tables
#creating more entities based on relationships between entities
mobile.normalize_dataframe(base_dataframe_name="Sales", new_dataframe_name="Camera", index="CameraID",
                              additional_columns = ["pc", "fc"])

mobile.normalize_dataframe(base_dataframe_name="Sales", new_dataframe_name="Screen", index="ScreenID",
                              additional_columns = ["sc_h", "sc_w", "px_width", "px_height"])

mobile.normalize_dataframe(base_dataframe_name="Sales", new_dataframe_name="Memory", index="MemoryID",
                              additional_columns = ["ram", "int_memory"])

mobile.normalize_dataframe(base_dataframe_name="Sales", new_dataframe_name="TechSpec", index="TechSpecID",
                              additional_columns = ["mobile_wt", "m_dep", "battery_power", "touch_screen",
                                                    "clock_speed", "n_cores", "wifi", "blue", "dual_sim",
                                                    "four_g", "three_g", "talk_time"])

Entityset: mobile
  DataFrames:
    Sales [Rows: 3001, Columns: 7]
    Camera [Rows: 3001, Columns: 3]
    Screen [Rows: 3001, Columns: 5]
    Memory [Rows: 3001, Columns: 3]
    TechSpec [Rows: 3001, Columns: 13]
  Relationships:
    Sales.CameraID -> Camera.CameraID
    Sales.ScreenID -> Screen.ScreenID
    Sales.MemoryID -> Memory.MemoryID
    Sales.TechSpecID -> TechSpec.TechSpecID

**Perform Deep Feature Synthesis**

In [26]:
#Target_dataframe_name = "Sales"
Sales_feature_matrix, Sales_feature_defs = ft.dfs(
    entityset = mobile,
    target_dataframe_name="Sales",
    max_depth = 2,
    n_jobs = 3
)

EntitySet scattered to 3 workers in 3 seconds


In [27]:
def display_tb (feature_matrix_num = 0, first_n = 5):
  feature_matrixes = [Sales_feature_matrix]
  name = ["Sales"]

  print(f"Displaying \"{name[feature_matrix_num]}\"")

  print("____________________________________________________________________________")
  print("All suggested columns")
  for n in list(feature_matrixes[feature_matrix_num].columns):
    print(n)

  n=first_n
  print("____________________________________________________________________________")
  print(f"\n\nLet's examine the feature matrix generated")
  display(feature_matrixes[feature_matrix_num])

In [28]:
display_tb()

Displaying "Sales"
____________________________________________________________________________
All suggested columns
CameraID
ScreenID
MemoryID
TechSpecID
date
price_range
Camera.pc
Camera.fc
Screen.sc_h
Screen.sc_w
Screen.px_width
Screen.px_height
Memory.ram
Memory.int_memory
TechSpec.mobile_wt
TechSpec.m_dep
TechSpec.battery_power
TechSpec.touch_screen
TechSpec.clock_speed
TechSpec.n_cores
TechSpec.wifi
TechSpec.blue
TechSpec.dual_sim
TechSpec.four_g
TechSpec.three_g
TechSpec.talk_time
Camera.COUNT(Sales)
Camera.MAX(Sales.price_range)
Camera.MEAN(Sales.price_range)
Camera.MIN(Sales.price_range)
Camera.MODE(Sales.date)
Camera.NUM_UNIQUE(Sales.date)
Camera.SKEW(Sales.price_range)
Camera.STD(Sales.price_range)
Camera.SUM(Sales.price_range)
Screen.COUNT(Sales)
Screen.MAX(Sales.price_range)
Screen.MEAN(Sales.price_range)
Screen.MIN(Sales.price_range)
Screen.MODE(Sales.date)
Screen.NUM_UNIQUE(Sales.date)
Screen.SKEW(Sales.price_range)
Screen.STD(Sales.price_range)
Screen.SUM(Sales.price_r

Unnamed: 0_level_0,CameraID,ScreenID,MemoryID,TechSpecID,date,price_range,Camera.pc,Camera.fc,Screen.sc_h,Screen.sc_w,...,Memory.SUM(Sales.price_range),TechSpec.COUNT(Sales),TechSpec.MAX(Sales.price_range),TechSpec.MEAN(Sales.price_range),TechSpec.MIN(Sales.price_range),TechSpec.MODE(Sales.date),TechSpec.NUM_UNIQUE(Sales.date),TechSpec.SKEW(Sales.price_range),TechSpec.STD(Sales.price_range),TechSpec.SUM(Sales.price_range)
SalesID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
50001,10001,20001,30001,40001,11/2/2022,2,16,14,12,7,...,2.0,1,2.0,2.0,2.0,11/2/2022,1,,,2.0
50002,10002,20002,30002,40002,11/2/2022,2,12,4,6,0,...,2.0,1,2.0,2.0,2.0,11/2/2022,1,,,2.0
50003,10003,20003,30003,40003,11/2/2022,2,4,1,17,10,...,2.0,1,2.0,2.0,2.0,11/2/2022,1,,,2.0
50004,10004,20004,30004,40004,11/2/2022,2,20,18,10,0,...,2.0,1,2.0,2.0,2.0,11/2/2022,1,,,2.0
50005,10005,20005,30005,40005,11/2/2022,2,18,11,15,8,...,2.0,1,2.0,2.0,2.0,11/2/2022,1,,,2.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
52997,12997,22997,32997,42997,19/03/2022,0,5,4,18,10,...,0.0,1,0.0,0.0,0.0,19/03/2022,1,,,0.0
52998,12998,22998,32998,42998,19/03/2022,3,16,5,19,4,...,3.0,1,3.0,3.0,3.0,19/03/2022,1,,,3.0
52999,12999,22999,32999,42999,19/03/2022,1,16,9,7,3,...,1.0,1,1.0,1.0,1.0,19/03/2022,1,,,1.0
53000,13000,23000,33000,43000,19/03/2022,1,20,8,10,0,...,1.0,1,1.0,1.0,1.0,19/03/2022,1,,,1.0


In [31]:
# Save the dataset in csv file
Sales_feature_matrix.to_csv('mobile_after_featuretools.csv', index=False)