In [97]:
import pandas as pd
import pyodbc
import sqlite3

# Connection

In [98]:
DB = {"servername": "MSI\\SQLEXPRESS",
      "database": "DEDSDatabase1"}

export_conn = pyodbc.connect('DRIVER={SQL SERVER};SERVER=' + DB['servername'] + 
                             ';DATABASE=' + DB['database'] + ';Trusted_Connection=yes')
export_cursor = export_conn.cursor()
export_cursor

<pyodbc.Cursor at 0x243a18a0430>

In [99]:
def select_from(name, connection):
      dataframe = pd.read_sql_query("SELECT * FROM " + name, connection)
      
      columni = {}
      for column in dataframe.columns:
            columni[column] = name.upper() + "_" + column.lower()
      dataframe.rename(columns=columni, inplace=True)
      return dataframe
      

def find_mutual_columns(dataset1, dataset2):
      column_names = set([])
      mutual_column_names = []
      for column in dataset1:
            column_names.add(column)
      for column in dataset2:
            if column in column_names:
                  mutual_column_names.append(column)
      return mutual_column_names

def merge_differing_columns_simple(dataframe1, dataframe2, key):
      # Pak de unieke columns uit de tweede dataframe
      column_names1 = set(dataframe1.columns)
      column_names2 = set(dataframe2.columns)
      uniques_from_2 = [key]
      for column in column_names2:
            if column not in column_names1:
                  uniques_from_2.append(column)
      
      # Merge deze met de eerste dataframe
      return pd.merge(dataframe1, dataframe2.loc[:, uniques_from_2], on=key, how='left')

def merge_differing_columns(dataframe1, dataframe2, left_key, right_key):
      # Pak de unieke columns uit de tweede dataframe
      column_names1 = set(dataframe1.columns)
      column_names2 = set(dataframe2.columns)
      uniques_from_2 = [right_key]
      for column in column_names2:
            if column not in column_names1:
                  uniques_from_2.append(column)
      
      # Merge deze met de eerste dataframe
      return pd.merge(dataframe1, dataframe2.loc[:, uniques_from_2], left_on=left_key, right_on=right_key, how='left')

In [100]:
connectie_sales = sqlite3.connect('go_sales.sqlite')
sql_query = "SELECT name FROM sqlite_master WHERE type='table';"

product = select_from("product", connectie_sales)
product_type = select_from("product_type", connectie_sales)
product_line = select_from("product_line", connectie_sales)
SALES_sales_staff = select_from("sales_staff", connectie_sales)
SALES_sales_branch = select_from("sales_branch", connectie_sales)
SALES_retailer_site = select_from("retailer_site", connectie_sales)
SALES_country = select_from("country", connectie_sales)
order_header = select_from("order_header", connectie_sales)
order_details = select_from("order_details", connectie_sales)
target = select_from("SALES_TARGETData", connectie_sales)
returned_item = select_from("returned_item", connectie_sales)
return_reason = select_from("return_reason", connectie_sales)

In [101]:
connectie_staff = sqlite3.connect('go_staff.sqlite')

course = select_from("course", connectie_staff)
STAFF_sales_staff = select_from("sales_staff", connectie_staff)
STAFF_sales_branch = select_from("sales_branch", connectie_staff)
satisfaction = select_from("satisfaction", connectie_staff)
satisfaction_type = select_from("satisfaction_type", connectie_staff)
training = select_from("training", connectie_staff)

In [102]:
connectie_crm = sqlite3.connect('go_crm.sqlite')

age_group = select_from("age_group", connectie_crm)
CRM_country = select_from("country", connectie_crm)
retailer = select_from("retailer", connectie_crm)
retailer_contact = select_from("retailer_contact", connectie_crm)
retailer_headquarters = select_from("retailer_headquarters", connectie_crm)
retailer_segment = select_from("retailer_segment", connectie_crm)
CRM_retailer_site = select_from("retailer_site", connectie_crm)
retailer_type = select_from("retailer_type", connectie_crm)
sales_demographic = select_from("sales_demographic", connectie_crm)
sales_territory = select_from("sales_territory", connectie_crm)

In [103]:
# CSV files
inventory_levels = pd.read_csv('GO_SALES_INVENTORY_LEVELSData.csv')
product_forecast = pd.read_csv('GO_SALES_PRODUCT_FORECASTData.csv')

In [104]:
# Merge similar tables
sales_staff = merge_differing_columns_simple(STAFF_sales_staff, SALES_sales_staff, 'SALES_STAFF_sales_staff_code')
sales_branch = merge_differing_columns_simple(SALES_sales_branch, STAFF_sales_branch, 'SALES_BRANCH_sales_branch_code')
country = merge_differing_columns_simple(SALES_country, CRM_country, 'COUNTRY_country_code')
retailer_site = merge_differing_columns_simple(SALES_retailer_site, CRM_retailer_site, 'RETAILER_SITE_retailer_site_code')

In [105]:
output_PRODUCT = pd.merge(product, product_type, left_on='PRODUCT_product_type_code', right_on='PRODUCT_TYPE_product_type_code', how='left')
output_PRODUCT.drop('PRODUCT_TYPE_product_type_code', axis=1, inplace=True)


Unnamed: 0,PRODUCT_product_number,PRODUCT_introduction_date,PRODUCT_product_type_code,PRODUCT_production_cost,PRODUCT_margin,PRODUCT_product_image,PRODUCT_language,PRODUCT_product_name,PRODUCT_description,PRODUCT_trial888,PRODUCT_TYPE_product_line_code,PRODUCT_TYPE_product_type_en,PRODUCT_TYPE_trial888
0,1,15-2-2011,1,4,.33,P01CE1CG1.jpg,EN,TrailChef Water Bag,"Lightweight, collapsible bag to carry liquids ...",T,1,Cooking Gear,T
1,10,15-2-2011,1,10,.4,P10CE1CG1.jpg,EN,TrailChef Utensils,"Spoon, fork and knife set made of a light yet ...",T,1,Cooking Gear,T
2,100,15-2-2011,17,3,.5,P100OP4FA17.jpg,EN,Insect Bite Relief,The Insect Bite Relief helps the itching and s...,T,4,First Aid,T
3,101,15-12-2019,18,305.54,.43,P101GE5IR18.jpg,EN,Hailstorm Steel Irons,Iron is 17-4 stainless steel. Shafts are grap...,T,5,Irons,T
4,102,10-12-2019,18,380.95,.51,P102GE5IR18.jpg,EN,Hailstorm Titanium Irons,Made entirely of pure titanium. The ultimate i...,T,5,Irons,T
...,...,...,...,...,...,...,...,...,...,...,...,...,...
110,95,15-2-2011,16,3,.5,P91OP4SS16.jpg,EN,Sun Shield,"PABA free sunscreen, SPF 30, poison oak and iv...",T,4,Sunscreen,T
111,96,15-2-2011,17,16.43,.28,P96OP4FA17.jpg,EN,Compact Relief Kit,A personal first aid kit is recommended for ev...,T,4,First Aid,T
112,97,5-3-2013,17,25,.28,P96OP4FA17.jpg,EN,Deluxe Family Relief Kit,A complete medical kit suitable for families w...,T,4,First Aid,T
113,98,15-2-2011,17,3,.5,P98OP4FA17.jpg,EN,Calamine Relief,Use the Calamine Relief for allergic skin reac...,T,4,First Aid,T
