# Import libraries

In [2]:
import requests
import pandas as pd

# Extract, Transform, Load (ETL)

## Extract Data
The dataset contains a list of top-level domain names, their types, and their sponsoring organization


In [4]:
url = "https://datahub.io/core/top-level-domain-names/r/top-level-domain-names.csv.csv"

# Send a GET request to the URL
response = requests.get(url)

# Check first if the request was successful 
# Write the content to a file if successful
if response.status_code == 200:
    with open("data.csv", "wb") as file:
        file.write(response.content)
    print("Success! File downloaded")
else:
    print("Failed to download file:", response.status_code)


Success! File downloaded


## Transform Data
We desire a dataset that contains a list of **generic** top-level domains. In the following code blocks, we will be transforming the extracted data such that the dataframe only contains rows of 'generic' type

In [5]:
# read extracted data in a dataframe
df = pd.read_csv('data.csv')
df

Unnamed: 0,Domain,Type,Sponsoring Organisation
0,.abbott,generic,"Abbott Laboratories, Inc."
1,.abogado,generic,Top Level Domain Holdings Limited
2,.ac,country-code,Network Information Center (AC Domain Registry...
3,.academy,generic,"Half Oaks, LLC"
4,.accountant,generic,dot Accountant Limited
...,...,...,...
914,.zip,generic,Charleston Road Registry Inc.
915,.zm,country-code,Zambia Information and Communications Technolo...
916,.zone,generic,"Outer Falls, LLC"
917,.zuerich,generic,Kanton Zürich (Canton of Zurich)


In [9]:
# filter dataframe 
generic_df = df[df['Type'] == 'generic']
generic_df

Unnamed: 0,Domain,Type,Sponsoring Organisation
0,.abbott,generic,"Abbott Laboratories, Inc."
1,.abogado,generic,Top Level Domain Holdings Limited
3,.academy,generic,"Half Oaks, LLC"
4,.accountant,generic,dot Accountant Limited
5,.accountants,generic,"Knob Town, LLC"
...,...,...,...
910,.yokohama,generic,"GMO Registry, Inc."
911,.youtube,generic,Charleston Road Registry Inc.
914,.zip,generic,Charleston Road Registry Inc.
916,.zone,generic,"Outer Falls, LLC"


In [None]:
from datetime import datetime, date

today = date.today()

# Insert a new column named 'Date', so that other users would know when this data was extracted
generic_df['Date'] = today.strftime('%Y-%m-%d') 

In [14]:
generic_df

Unnamed: 0,Domain,Type,Sponsoring Organisation,Date
0,.abbott,generic,"Abbott Laboratories, Inc.",2024-02-19
1,.abogado,generic,Top Level Domain Holdings Limited,2024-02-19
3,.academy,generic,"Half Oaks, LLC",2024-02-19
4,.accountant,generic,dot Accountant Limited,2024-02-19
5,.accountants,generic,"Knob Town, LLC",2024-02-19
...,...,...,...,...
910,.yokohama,generic,"GMO Registry, Inc.",2024-02-19
911,.youtube,generic,Charleston Road Registry Inc.,2024-02-19
914,.zip,generic,Charleston Road Registry Inc.,2024-02-19
916,.zone,generic,"Outer Falls, LLC",2024-02-19


In [15]:
generic_df.to_csv('transformed_data.csv', index=False)

## Load Data
We will store the transformed CSV file into a SQLite database. Using the `sqlalchemy` library, we can connect and write to a SQLite database

In [3]:
df = pd.read_csv('transformed_data.csv')

In [4]:
from sqlalchemy import create_engine
engine = create_engine('sqlite:///database.db')

# Write the dataframe to a database table
df.to_sql('data', engine, if_exists='replace', index=False)

590