# Working with CSV

In [1]:
import csv

In [2]:
# open file
file = open(r"C:\Users\gunsr\Desktop\Programming\Git Remote\Data-Science-Bootcamp\Sprint 10 - Essential Python for Data Analyst\Data Resource\hotel.csv", "r")

# read all data in this file
data = csv.reader(file)

# define list
result = []

# print each row in terminal
for row in data:
    result.append(row)

# close files
file.close()

print(result)

[['id', 'hotel', 'location', 'pricepernight'], ['1', 'Ideo', 'Bangkok', '20'], ['2', 'Accor', 'London', '25'], ['3', 'Premium', 'Seoul', '32'], ['4', 'Super Star', 'Dubai', '50'], ['5', 'Planet', 'Tokyo', '22'], ['6', 'Joby', 'Bangkok', '30'], ['7', 'JW Marriot', 'Bangkok', '35'], ['8', 'Kenshin', 'Tokyo', '20'], ['9', 'Eloquent', 'London', '42'], ['10', 'Big Ben', 'London', '45']]


In [3]:
print(result[0])

['id', 'hotel', 'location', 'pricepernight']


In [4]:
# context manager .. with

result = []
try:
    with open(r"C:\Users\gunsr\Desktop\Programming\Git Remote\Data-Science-Bootcamp\Sprint 10 - Essential Python for Data Analyst\Data Resource\hotel.csv", "r") as file:
        data = csv.reader(file)
        for row in data:
            result.append(row)

except FileNotFoundError:
    print("File not found")

print(result)

[['id', 'hotel', 'location', 'pricepernight'], ['1', 'Ideo', 'Bangkok', '20'], ['2', 'Accor', 'London', '25'], ['3', 'Premium', 'Seoul', '32'], ['4', 'Super Star', 'Dubai', '50'], ['5', 'Planet', 'Tokyo', '22'], ['6', 'Joby', 'Bangkok', '30'], ['7', 'JW Marriot', 'Bangkok', '35'], ['8', 'Kenshin', 'Tokyo', '20'], ['9', 'Eloquent', 'London', '42'], ['10', 'Big Ben', 'London', '45']]


In [5]:
# write a new csv file

with open (r"C:\Users\gunsr\Desktop\Programming\Git Remote\Data-Science-Bootcamp\Sprint 10 - Essential Python for Data Analyst\Data Resource\hello.csv", "w") as file:
    writter = csv.writer(file)
    writter.writerow(["id", "name", "city"])
    writter.writerow(["1", "Gun", "Bangkok"])
    writter.writerow(["2", "Hoon", "PRB"])
    writter.writerow(["3", "Frank", "KMITL"])

In [6]:
headers = ["id", "name", "number_students"]
data = [
    [1 , "CU", 15000],
    [2 , "TU", 9800],
    [3 , "KU", 12500]
]

with open (r"C:\Users\gunsr\Desktop\Programming\Git Remote\Data-Science-Bootcamp\Sprint 10 - Essential Python for Data Analyst\Data Resource\school.csv", "w") as file:
    writter = csv.writer(file)
    writter.writerow(headers)
    writter.writerows(data)

# Class Review : CSV monster

In [7]:
# Object Oriented Programming
# OOP Language

class Book:
    def __init__(self, name, author, year):
        self.name = name
        self.author = author
        self.year = year

In [8]:
# create new instance

my_book = Book("R for Data Science", "Hadley Wickham", 2018)

In [9]:
print(my_book.name)
print(my_book.author)
print(my_book.year)

R for Data Science
Hadley Wickham
2018


In [10]:
friend_book = Book("Python for Beginner", "Toy", 2022)
print(friend_book.name)
print(friend_book.author)
print(friend_book.year)

Python for Beginner
Toy
2022


# CSV monster to Analyze Data

In [11]:
class CSVmonster:
    def __init__(self):
        self.list_data = []

    # 1) read csv file
    def read_csv(self, filename):
        try:
            with open(filename, "r") as file:
                data = csv.reader(file)
                for row in data:
                    self.list_data.append(row)  
                
        except FileNotFoundError:
            print("File not found")
        
        else:
            print("Data Loaded Successfully")
    
    # 2) Print Head
    def head(self, n=5):
        header = self.list_data[0]
        data = self.list_data[1:n+1]
        print(header)
        for row in data:
            print(row)

    # 3 Print Tail
    def tail(self, n=5):
        header = self.list_data[0]
        data = self.list_data[-n:]
        print(header)
        for row in data:
            print(row)

    # 4 Filter Data
    def filter_city(self, city):
        header = self.list_data[0]
        print(header)
        result = []
        for row in self.list_data[1:]:
            if row[2] == city:
                result.append(row)
                print(row)

    # 5 Find average price per night
    def find_mean_price(self):
        list_price = []
        for row in self.list_data[1:]:
            list_price.append(int(row[3]))
        
        avg_price = sum(list_price) / len(list_price)
        print(f'Average Price: {avg_price} USD per night')
        print(f'Max Price: {max(list_price)} USD')
        print(f'Max Price: {min(list_price)} USD')

In [12]:
monster = CSVmonster()

monster.read_csv(r"C:\Users\gunsr\Desktop\Programming\Git Remote\Data-Science-Bootcamp\Sprint 10 - Essential Python for Data Analyst\Data Resource\hotel.csv")

Data Loaded Successfully


In [13]:
monster.list_data

[['id', 'hotel', 'location', 'pricepernight'],
 ['1', 'Ideo', 'Bangkok', '20'],
 ['2', 'Accor', 'London', '25'],
 ['3', 'Premium', 'Seoul', '32'],
 ['4', 'Super Star', 'Dubai', '50'],
 ['5', 'Planet', 'Tokyo', '22'],
 ['6', 'Joby', 'Bangkok', '30'],
 ['7', 'JW Marriot', 'Bangkok', '35'],
 ['8', 'Kenshin', 'Tokyo', '20'],
 ['9', 'Eloquent', 'London', '42'],
 ['10', 'Big Ben', 'London', '45']]

In [14]:
monster.head(3) # default n = 5

['id', 'hotel', 'location', 'pricepernight']
['1', 'Ideo', 'Bangkok', '20']
['2', 'Accor', 'London', '25']
['3', 'Premium', 'Seoul', '32']


In [15]:
monster.tail(3)

['id', 'hotel', 'location', 'pricepernight']
['8', 'Kenshin', 'Tokyo', '20']
['9', 'Eloquent', 'London', '42']
['10', 'Big Ben', 'London', '45']


In [16]:
monster.filter_city("Bangkok")

['id', 'hotel', 'location', 'pricepernight']
['1', 'Ideo', 'Bangkok', '20']
['6', 'Joby', 'Bangkok', '30']
['7', 'JW Marriot', 'Bangkok', '35']


In [17]:
monster.find_mean_price()

Average Price: 32.1 USD per night
Max Price: 50 USD
Max Price: 20 USD


# Intro to Pandas / Numpy

In [18]:
import pandas as pd # Dataframe
import numpy as np # Numerical python / computing

In [19]:
x = np.array([1, 2, 3])
type(x)

numpy.ndarray

In [20]:
x * 5

array([ 5, 10, 15])

In [21]:
np.array([1, 2, 3]) ** 3 

array([ 1,  8, 27], dtype=int32)

In [22]:
# 2D array

y = np.array([
    [1, 2, 3],
    [4, 5, 6]
])

In [23]:
x = np.array([1, 15, 20, 8, 9])

In [24]:
print(x.mean(), x.sum(), x.max(), x.min(), x.std())

10.6 53 20 1 6.468384651518491


In [25]:
np.median(x)

9.0

In [26]:
# pandas

data = {
    "id" : [1, 2, 3, 4],
    "name" : ["Toy", "Ink", "Ball", "Top"],
    "city" : ["BKK", "KOR", "JAP", "USA"]
}

In [27]:
df = pd.DataFrame(data)
df

Unnamed: 0,id,name,city
0,1,Toy,BKK
1,2,Ink,KOR
2,3,Ball,JAP
3,4,Top,USA


In [28]:
# Read CSV file from url

df = pd.read_csv("https://gist.githubusercontent.com/seankross/a412dfbd88b3db70b74b/raw/5f23f993cd87c283ce766e7ac6b329ee7cc2e1d1/mtcars.csv")
df

Unnamed: 0,model,mpg,cyl,disp,hp,drat,wt,qsec,vs,am,gear,carb
0,Mazda RX4,21.0,6,160.0,110,3.9,2.62,16.46,0,1,4,4
1,Mazda RX4 Wag,21.0,6,160.0,110,3.9,2.875,17.02,0,1,4,4
2,Datsun 710,22.8,4,108.0,93,3.85,2.32,18.61,1,1,4,1
3,Hornet 4 Drive,21.4,6,258.0,110,3.08,3.215,19.44,1,0,3,1
4,Hornet Sportabout,18.7,8,360.0,175,3.15,3.44,17.02,0,0,3,2
5,Valiant,18.1,6,225.0,105,2.76,3.46,20.22,1,0,3,1
6,Duster 360,14.3,8,360.0,245,3.21,3.57,15.84,0,0,3,4
7,Merc 240D,24.4,4,146.7,62,3.69,3.19,20.0,1,0,4,2
8,Merc 230,22.8,4,140.8,95,3.92,3.15,22.9,1,0,4,2
9,Merc 280,19.2,6,167.6,123,3.92,3.44,18.3,1,0,4,4


In [29]:
type(df)

pandas.core.frame.DataFrame

In [30]:
df.head(3)

Unnamed: 0,model,mpg,cyl,disp,hp,drat,wt,qsec,vs,am,gear,carb
0,Mazda RX4,21.0,6,160.0,110,3.9,2.62,16.46,0,1,4,4
1,Mazda RX4 Wag,21.0,6,160.0,110,3.9,2.875,17.02,0,1,4,4
2,Datsun 710,22.8,4,108.0,93,3.85,2.32,18.61,1,1,4,1


In [31]:
df.tail(3)

Unnamed: 0,model,mpg,cyl,disp,hp,drat,wt,qsec,vs,am,gear,carb
29,Ferrari Dino,19.7,6,145.0,175,3.62,2.77,15.5,0,1,5,6
30,Maserati Bora,15.0,8,301.0,335,3.54,3.57,14.6,0,1,5,8
31,Volvo 142E,21.4,4,121.0,109,4.11,2.78,18.6,1,1,4,2


In [32]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 32 entries, 0 to 31
Data columns (total 12 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   model   32 non-null     object 
 1   mpg     32 non-null     float64
 2   cyl     32 non-null     int64  
 3   disp    32 non-null     float64
 4   hp      32 non-null     int64  
 5   drat    32 non-null     float64
 6   wt      32 non-null     float64
 7   qsec    32 non-null     float64
 8   vs      32 non-null     int64  
 9   am      32 non-null     int64  
 10  gear    32 non-null     int64  
 11  carb    32 non-null     int64  
dtypes: float64(5), int64(6), object(1)
memory usage: 3.1+ KB


In [33]:
df.shape # dataframe attribute

(32, 12)

In [34]:
# summary statistic
df.describe()

Unnamed: 0,mpg,cyl,disp,hp,drat,wt,qsec,vs,am,gear,carb
count,32.0,32.0,32.0,32.0,32.0,32.0,32.0,32.0,32.0,32.0,32.0
mean,20.090625,6.1875,230.721875,146.6875,3.596563,3.21725,17.84875,0.4375,0.40625,3.6875,2.8125
std,6.026948,1.785922,123.938694,68.562868,0.534679,0.978457,1.786943,0.504016,0.498991,0.737804,1.6152
min,10.4,4.0,71.1,52.0,2.76,1.513,14.5,0.0,0.0,3.0,1.0
25%,15.425,4.0,120.825,96.5,3.08,2.58125,16.8925,0.0,0.0,3.0,2.0
50%,19.2,6.0,196.3,123.0,3.695,3.325,17.71,0.0,0.0,4.0,2.0
75%,22.8,8.0,326.0,180.0,3.92,3.61,18.9,1.0,1.0,4.0,4.0
max,33.9,8.0,472.0,335.0,4.93,5.424,22.9,1.0,1.0,5.0,8.0


# Read CSV Sample-SuperStore

In [35]:
# Read CSV from drive / local

store = pd.read_csv(r"C:\Users\gunsr\Desktop\Programming\Git Remote\Data-Science-Bootcamp\Sprint 10 - Essential Python for Data Analyst\Data Resource\Sample SuperStore.csv")

In [36]:
store.head()

Unnamed: 0,Row ID,Order ID,Order Date,Ship Date,Ship Mode,Customer ID,Customer Name,Segment,Country,City,...,Postal Code,Region,Product ID,Category,Sub-Category,Product Name,Sales,Quantity,Discount,Profit
0,1,CA-2016-152156,11/8/2016,11/11/2016,Second Class,CG-12520,Claire Gute,Consumer,United States,Henderson,...,42420,South,FUR-BO-10001798,Furniture,Bookcases,Bush Somerset Collection Bookcase,261.96,2,0.0,41.9136
1,2,CA-2016-152156,11/8/2016,11/11/2016,Second Class,CG-12520,Claire Gute,Consumer,United States,Henderson,...,42420,South,FUR-CH-10000454,Furniture,Chairs,"Hon Deluxe Fabric Upholstered Stacking Chairs,...",731.94,3,0.0,219.582
2,3,CA-2016-138688,6/12/2016,6/16/2016,Second Class,DV-13045,Darrin Van Huff,Corporate,United States,Los Angeles,...,90036,West,OFF-LA-10000240,Office Supplies,Labels,Self-Adhesive Address Labels for Typewriters b...,14.62,2,0.0,6.8714
3,4,US-2015-108966,10/11/2015,10/18/2015,Standard Class,SO-20335,Sean O'Donnell,Consumer,United States,Fort Lauderdale,...,33311,South,FUR-TA-10000577,Furniture,Tables,Bretford CR4500 Series Slim Rectangular Table,957.5775,5,0.45,-383.031
4,5,US-2015-108966,10/11/2015,10/18/2015,Standard Class,SO-20335,Sean O'Donnell,Consumer,United States,Fort Lauderdale,...,33311,South,OFF-ST-10000760,Office Supplies,Storage,Eldon Fold 'N Roll Cart System,22.368,2,0.2,2.5164


In [37]:
# Clean column names

column_names = list(store.columns)

clean_names = []
for col in column_names:
    clean_names.append(col.replace(" ", "_").lower().replace("-", "_"))

print(clean_names)

['row_id', 'order_id', 'order_date', 'ship_date', 'ship_mode', 'customer_id', 'customer_name', 'segment', 'country', 'city', 'state', 'postal_code', 'region', 'product_id', 'category', 'sub_category', 'product_name', 'sales', 'quantity', 'discount', 'profit']


In [38]:
# List Comprehension

clean_names_v2 = [col.replace(" ", "_").lower().replace("-", "_") for col in column_names]
print(clean_names_v2)

['row_id', 'order_id', 'order_date', 'ship_date', 'ship_mode', 'customer_id', 'customer_name', 'segment', 'country', 'city', 'state', 'postal_code', 'region', 'product_id', 'category', 'sub_category', 'product_name', 'sales', 'quantity', 'discount', 'profit']


In [39]:
# Assign new column names

store.columns = clean_names_v2
store.head()

Unnamed: 0,row_id,order_id,order_date,ship_date,ship_mode,customer_id,customer_name,segment,country,city,...,postal_code,region,product_id,category,sub_category,product_name,sales,quantity,discount,profit
0,1,CA-2016-152156,11/8/2016,11/11/2016,Second Class,CG-12520,Claire Gute,Consumer,United States,Henderson,...,42420,South,FUR-BO-10001798,Furniture,Bookcases,Bush Somerset Collection Bookcase,261.96,2,0.0,41.9136
1,2,CA-2016-152156,11/8/2016,11/11/2016,Second Class,CG-12520,Claire Gute,Consumer,United States,Henderson,...,42420,South,FUR-CH-10000454,Furniture,Chairs,"Hon Deluxe Fabric Upholstered Stacking Chairs,...",731.94,3,0.0,219.582
2,3,CA-2016-138688,6/12/2016,6/16/2016,Second Class,DV-13045,Darrin Van Huff,Corporate,United States,Los Angeles,...,90036,West,OFF-LA-10000240,Office Supplies,Labels,Self-Adhesive Address Labels for Typewriters b...,14.62,2,0.0,6.8714
3,4,US-2015-108966,10/11/2015,10/18/2015,Standard Class,SO-20335,Sean O'Donnell,Consumer,United States,Fort Lauderdale,...,33311,South,FUR-TA-10000577,Furniture,Tables,Bretford CR4500 Series Slim Rectangular Table,957.5775,5,0.45,-383.031
4,5,US-2015-108966,10/11/2015,10/18/2015,Standard Class,SO-20335,Sean O'Donnell,Consumer,United States,Fort Lauderdale,...,33311,South,OFF-ST-10000760,Office Supplies,Storage,Eldon Fold 'N Roll Cart System,22.368,2,0.2,2.5164


In [40]:
# Select column

column_i_want = ['order_date', 
                'customer_name',
                'segment',
                'country',
                'city']

store2 = store[column_i_want]
store2.head()

Unnamed: 0,order_date,customer_name,segment,country,city
0,11/8/2016,Claire Gute,Consumer,United States,Henderson
1,11/8/2016,Claire Gute,Consumer,United States,Henderson
2,6/12/2016,Darrin Van Huff,Corporate,United States,Los Angeles
3,10/11/2015,Sean O'Donnell,Consumer,United States,Fort Lauderdale
4,10/11/2015,Sean O'Donnell,Consumer,United States,Fort Lauderdale


In [41]:
# Filter Rows

store2.query("city == 'Los Angeles'").head()

Unnamed: 0,order_date,customer_name,segment,country,city
2,6/12/2016,Darrin Van Huff,Corporate,United States,Los Angeles
5,6/9/2014,Brosina Hoffman,Consumer,United States,Los Angeles
6,6/9/2014,Brosina Hoffman,Consumer,United States,Los Angeles
7,6/9/2014,Brosina Hoffman,Consumer,United States,Los Angeles
8,6/9/2014,Brosina Hoffman,Consumer,United States,Los Angeles


In [42]:
# Combine select column + filter
# recommend filter before select

store.query("city == 'Los Angeles'")[['order_date', 'customer_name']].head()

Unnamed: 0,order_date,customer_name
2,6/12/2016,Darrin Van Huff
5,6/9/2014,Brosina Hoffman
6,6/9/2014,Brosina Hoffman
7,6/9/2014,Brosina Hoffman
8,6/9/2014,Brosina Hoffman


In [43]:
# Query multiple conditions

print(store.query("city == 'Henderson' & segment == 'Consumer'").shape)
print(store.query("city == 'Henderson' | segment == 'Consumer'").shape)

(22, 21)
(5220, 21)


In [44]:
# Value Counts
# Top 10 cities with most orders

store['city'].value_counts().head(10)

New York City    915
Los Angeles      747
Philadelphia     537
San Francisco    510
Seattle          428
Houston          377
Chicago          314
Columbus         222
San Diego        170
Springfield      163
Name: city, dtype: int64

In [45]:
store.columns

Index(['row_id', 'order_id', 'order_date', 'ship_date', 'ship_mode',
       'customer_id', 'customer_name', 'segment', 'country', 'city', 'state',
       'postal_code', 'region', 'product_id', 'category', 'sub_category',
       'product_name', 'sales', 'quantity', 'discount', 'profit'],
      dtype='object')

In [46]:
# Dataframe Aggregation
# SQL Aggregate Function
# AVG SUM MIN MAX COUNT

result = store.groupby(['city', 'segment'])[['sales', 'profit']]\
        .agg(['mean', 'sum', 'max', 'min', 'count'])\
        .reset_index()

In [47]:
result.to_csv(r"C:\Users\gunsr\Desktop\Programming\Git Remote\Data-Science-Bootcamp\Sprint 10 - Essential Python for Data Analyst\Data Resource\agg_store.csv", index=False)

In [48]:
# Create a new column

df = store[['country', 'city', 'sales']].head()

df['sales_add_vat'] = df['sales'] * 1.15

# Drop columns
df = df.drop(columns = ['country', 'city'])
df

Unnamed: 0,sales,sales_add_vat
0,261.96,301.254
1,731.94,841.731
2,14.62,16.813
3,957.5775,1101.214125
4,22.368,25.7232


# Request API

In [49]:
# API -> Application Programming Interface

import requests
import time

In [50]:
# API endpoint

url = "https://swapi.dev/api/people/"

In [51]:
# Loop request data

result = []
for i in range(10):
    new_url = url + str(i+1)
    response = requests.get(new_url)
    response_json = response.json() # json == dict
    data = [
        response_json['name'],
        response_json['height'],
        response_json['mass'],
        response_json['birth_year']
    ]
    result.append(data)
    time.sleep(1) # delay 1 sec

print(result)

[['Luke Skywalker', '172', '77', '19BBY'], ['C-3PO', '167', '75', '112BBY'], ['R2-D2', '96', '32', '33BBY'], ['Darth Vader', '202', '136', '41.9BBY'], ['Leia Organa', '150', '49', '19BBY'], ['Owen Lars', '178', '120', '52BBY'], ['Beru Whitesun lars', '165', '75', '47BBY'], ['R5-D4', '97', '32', 'unknown'], ['Biggs Darklighter', '183', '84', '24BBY'], ['Obi-Wan Kenobi', '182', '77', '57BBY']]


In [52]:
# Create Dataframe

header = ['Name', 'Height', 'Mass', 'Birth Year']
starwars = pd.DataFrame(result, columns = header)
starwars

Unnamed: 0,Name,Height,Mass,Birth Year
0,Luke Skywalker,172,77,19BBY
1,C-3PO,167,75,112BBY
2,R2-D2,96,32,33BBY
3,Darth Vader,202,136,41.9BBY
4,Leia Organa,150,49,19BBY
5,Owen Lars,178,120,52BBY
6,Beru Whitesun lars,165,75,47BBY
7,R5-D4,97,32,unknown
8,Biggs Darklighter,183,84,24BBY
9,Obi-Wan Kenobi,182,77,57BBY


In [53]:
starwars.to_csv(r"C:\Users\gunsr\Desktop\Programming\Git Remote\Data-Science-Bootcamp\Sprint 10 - Essential Python for Data Analyst\Data Resource\starwars.csv", index=False)

# Basic Web Scraping

In [54]:
import requests
from gazpacho import Soup

In [55]:
url = "https://www.imdb.com/search/title/?groups=top_100&sort=user_rating,desc"

In [56]:
response = requests.get(url)
response.status_code

200

In [57]:
# Soup Object

imdb = Soup(response.text)

In [58]:
# Find h3

imdb.find('h3', {'class' : 'lister-item-header'}, mode = "first").strip() # only first pull, preview data # strip to delete tag name

'1. The Shawshank Redemption (1994)'

In [59]:
# Get title movie name
titles = imdb.find('h3', {'class' : 'lister-item-header'})

# List Comprehension
clean_titles = [title.strip() for title in titles]

# for title in titles:
#     print(title.strip())

print(clean_titles[0:3])

['1. The Shawshank Redemption (1994)', '2. The Godfather (1972)', '3. The Dark Knight (2008)']


In [60]:
# Get runtime
runtimes = imdb.find('span', {'class' : 'runtime'})
clean_runtimes = [int(runtime.strip().replace("min", "")) for runtime in runtimes]
print(clean_runtimes)

[142, 175, 152, 201, 195, 202, 96, 178, 154, 148, 179, 139, 142, 161, 169, 136, 145, 124, 133, 207, 130, 148, 130, 150, 125, 169, 189, 116, 127, 110, 137, 118, 116, 121, 133, 132, 149, 106, 165, 112, 170, 130, 151, 113, 155, 119, 106, 88, 155, 89]


In [61]:
# Create Dataframe

imdb_df = pd.DataFrame(
    {
        "title" : clean_titles,
        "runtime" : clean_runtimes
    }
)

In [62]:
imdb_df.head(10)

Unnamed: 0,title,runtime
0,1. The Shawshank Redemption (1994),142
1,2. The Godfather (1972),175
2,3. The Dark Knight (2008),152
3,4. The Lord of the Rings: The Return of the Ki...,201
4,5. Schindler's List (1993),195
5,6. The Godfather: Part II (1974),202
6,7. 12 Angry Men (1957),96
7,8. The Lord of the Rings: The Fellowship of th...,178
8,9. Pulp Fiction (1994),154
9,10. Inception (2010),148


# Working with SQLite Database

In [63]:
import sqlite3

In [64]:
# Create Connection

con = sqlite3.connect("employee.db")

# Create Cusor, execute SQL commands

cur = con.cursor()

cur.execute("create table department (id int, dept text)")

cur.execute("""insert into department values 
            (1, 'Data'),
            (2, 'Finance'),
            (3, 'Marketing')
            """)

con.commit() # commit change

con.close

<function Connection.close>

In [65]:
# Get data from employee.db

con = sqlite3.connect('employee.db')
cur = con.cursor()
cur.execute("select * from department")
result = cur.fetchall()

for row in result:
    print(row)
    
con.close()

(1, 'Data')
(2, 'Finance')
(3, 'Marketing')


In [66]:
# Get data from chinook.db

# Create Connection
con = sqlite3.connect(r'C:\Users\gunsr\Desktop\Programming\Git Remote\Data-Science-Bootcamp\Sprint 10 - Essential Python for Data Analyst\Data Resource\chinook.db')

In [67]:
customer = pd.read_sql_query("select * from customers limit 5", con)

print(customer)

con.close

   CustomerId  FirstName     LastName  \
0           1       Luís    Gonçalves   
1           2     Leonie       Köhler   
2           3   François     Tremblay   
3           4      Bjørn       Hansen   
4           5  František  Wichterlová   

                                            Company  \
0  Embraer - Empresa Brasileira de Aeronáutica S.A.   
1                                              None   
2                                              None   
3                                              None   
4                                  JetBrains s.r.o.   

                           Address                 City State         Country  \
0  Av. Brigadeiro Faria Lima, 2170  São José dos Campos    SP          Brazil   
1          Theodor-Heuss-Straße 34            Stuttgart  None         Germany   
2                1498 rue Bélanger             Montréal    QC          Canada   
3                 Ullevålsveien 14                 Oslo  None          Norway   
4                

<function Connection.close>