# **Create, Load and Query a Dataset on a SQL Database**

# Loading necessary packages and changing working directory

- The working direcory is where the dataset is stored

In [None]:
import os
os.chdir('C:/Users/luisp/Desktop/Biz/Datasets portefolio')

import pandas as pd
import sqlite3
import matplotlib.pyplot as plt
import seaborn as sns

# Loading the CSV file using Pandas and showing the first 5 rows

In [3]:
df= pd.read_csv('E-commerce_Customer_Behavior.csv')
df.head()

Unnamed: 0,Customer ID,Gender,Age,City,Membership Type,Total Spend,Items Purchased,Average Rating,Discount Applied,Days Since Last Purchase,Satisfaction Level
0,101,Female,29,New York,Gold,1120.2,14,4.6,True,25,Satisfied
1,102,Male,34,Los Angeles,Silver,780.5,11,4.1,False,18,Neutral
2,103,Female,43,Chicago,Bronze,510.75,9,3.4,True,42,Unsatisfied
3,104,Male,30,San Francisco,Gold,1480.3,19,4.7,False,12,Satisfied
4,105,Male,27,Miami,Silver,720.4,13,4.0,True,55,Unsatisfied


# Change all the 'spaces' on the column to 'underscores' to make it easier to query

In [28]:
df.columns = df.columns.str.replace(' ', '_')
df.head()

Unnamed: 0,Customer_ID,Gender,Age,City,Membership_Type,Total_Spend,Items_Purchased,Average_Rating,Discount_Applied,Days_Since_Last_Purchase,Satisfaction_Level
0,101,Female,29,New York,Gold,1120.2,14,4.6,True,25,Satisfied
1,102,Male,34,Los Angeles,Silver,780.5,11,4.1,False,18,Neutral
2,103,Female,43,Chicago,Bronze,510.75,9,3.4,True,42,Unsatisfied
3,104,Male,30,San Francisco,Gold,1480.3,19,4.7,False,12,Satisfied
4,105,Male,27,Miami,Silver,720.4,13,4.0,True,55,Unsatisfied


# Creating a connection object to the SQL Database

In [33]:
conn= sqlite3.connect('portefolio_database.db')

# Loading the dataset to the Database

In [34]:
df.to_sql('ecom_data', con=conn, if_exists='replace', index=False)
conn.close()

# Connecting to the SQL Database using the SQLite3 magic function

In [32]:
%load_ext sql
%config SqlMagic.style = '_DEPRECATED_DEFAULT'
%sql sqlite:///portefolio_database.db

The sql extension is already loaded. To reload it, use:
  %reload_ext sql


# Selecting all the data from the ecom_data table 

In [35]:
%sql "SELECT * FROM ecom_data"

 * sqlite:///portefolio_database.db
Done.


Customer_ID,Gender,Age,City,Membership_Type,Total_Spend,Items_Purchased,Average_Rating,Discount_Applied,Days_Since_Last_Purchase,Satisfaction_Level
101,Female,29,New York,Gold,1120.2,14,4.6,1,25,Satisfied
102,Male,34,Los Angeles,Silver,780.5,11,4.1,0,18,Neutral
103,Female,43,Chicago,Bronze,510.75,9,3.4,1,42,Unsatisfied
104,Male,30,San Francisco,Gold,1480.3,19,4.7,0,12,Satisfied
105,Male,27,Miami,Silver,720.4,13,4.0,1,55,Unsatisfied
106,Female,37,Houston,Bronze,440.8,8,3.1,0,22,Neutral
107,Female,31,New York,Gold,1150.6,15,4.5,1,28,Satisfied
108,Male,35,Los Angeles,Silver,800.9,12,4.2,0,14,Neutral
109,Female,41,Chicago,Bronze,495.25,10,3.6,1,40,Unsatisfied
110,Male,28,San Francisco,Gold,1520.1,21,4.8,0,9,Satisfied


# Selecting all the data from the Age column of the ecom_data

In [36]:
%sql "SELECT Age FROM ecom_data"

 * sqlite:///portefolio_database.db
Done.


Age
29
34
43
30
27
37
31
35
41
28


# Selecting all the rows of the table that represent females

In [37]:
%sql "SELECT * FROM ecom_data WHERE Gender = 'Female'"

 * sqlite:///portefolio_database.db
Done.


Customer_ID,Gender,Age,City,Membership_Type,Total_Spend,Items_Purchased,Average_Rating,Discount_Applied,Days_Since_Last_Purchase,Satisfaction_Level
101,Female,29,New York,Gold,1120.2,14,4.6,1,25,Satisfied
103,Female,43,Chicago,Bronze,510.75,9,3.4,1,42,Unsatisfied
106,Female,37,Houston,Bronze,440.8,8,3.1,0,22,Neutral
107,Female,31,New York,Gold,1150.6,15,4.5,1,28,Satisfied
109,Female,41,Chicago,Bronze,495.25,10,3.6,1,40,Unsatisfied
112,Female,36,Houston,Bronze,470.5,7,3.2,0,20,Neutral
113,Female,30,New York,Gold,1200.8,16,4.3,1,21,Satisfied
115,Female,42,Chicago,Bronze,530.4,9,3.5,1,38,Unsatisfied
118,Female,38,Houston,Bronze,450.9,8,3.0,0,25,Neutral
119,Female,32,New York,Gold,1170.3,14,4.7,1,29,Satisfied


# Selecting all the rows that represent females older than 30 years old

In [38]:
%sql "SELECT * FROM ecom_data WHERE Gender = 'Female' AND Age > 30"

 * sqlite:///portefolio_database.db
Done.


Customer_ID,Gender,Age,City,Membership_Type,Total_Spend,Items_Purchased,Average_Rating,Discount_Applied,Days_Since_Last_Purchase,Satisfaction_Level
103,Female,43,Chicago,Bronze,510.75,9,3.4,1,42,Unsatisfied
106,Female,37,Houston,Bronze,440.8,8,3.1,0,22,Neutral
107,Female,31,New York,Gold,1150.6,15,4.5,1,28,Satisfied
109,Female,41,Chicago,Bronze,495.25,10,3.6,1,40,Unsatisfied
112,Female,36,Houston,Bronze,470.5,7,3.2,0,20,Neutral
115,Female,42,Chicago,Bronze,530.4,9,3.5,1,38,Unsatisfied
118,Female,38,Houston,Bronze,450.9,8,3.0,0,25,Neutral
119,Female,32,New York,Gold,1170.3,14,4.7,1,29,Satisfied
121,Female,43,Chicago,Bronze,505.75,10,3.3,1,41,Unsatisfied
124,Female,37,Houston,Bronze,430.8,7,3.4,0,23,Neutral


# Showing all customers from New York and Chicago ordered by total money spent

In [40]:
%sql "SELECT Customer_ID, City, Total_Spend FROM ecom_data WHERE City IN ('New York', 'Chicago') ORDER BY Total_Spend ASC"

 * sqlite:///portefolio_database.db
Done.


Customer_ID,City,Total_Spend
372,Chicago,475.25
396,Chicago,475.25
420,Chicago,475.25
444,Chicago,475.25
145,Chicago,480.25
175,Chicago,480.25
205,Chicago,480.25
235,Chicago,480.25
247,Chicago,480.25
277,Chicago,480.25


# Showing how many orders were placed in each city

In [42]:
%sql "SELECT City, COUNT(*) AS total_orders FROM ecom_data GROUP BY City"

 * sqlite:///portefolio_database.db
Done.


City,total_orders
Chicago,58
Houston,58
Los Angeles,59
Miami,58
New York,59
San Francisco,58


# Showing the average rating per city

In [44]:
%sql "SELECT City, ROUND(AVG(Average_Rating),3) as Average_Rating_City FROM ecom_data GROUP BY City"

 * sqlite:///portefolio_database.db
Done.


City,Average_Rating_City
Chicago,3.457
Houston,3.193
Los Angeles,4.173
Miami,3.928
New York,4.544
San Francisco,4.809
