In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import os
from getpass import getpass
from mysql.connector import connect, Error
from dotenv import load_dotenv

class MySQL:
    
    def __init__(self):
        self.client = None
        load_dotenv()
    
    def connect_client(self):
        if self.client:
            return self.client
        
        try:
            self.client = connect(
                host='localhost',
                user=os.getenv('USERNAME'),
                password=os.getenv('PASS'),
                database='fcc',
                allow_local_infile=True
            )
        except Error as e:
            print(e)
            raise Error
        
        return self.client
    
    def run(self, query):
        if not self.client:
            cli = self.connect_client()
        else:
            cli = self.client
        
        try:
            with cli.cursor() as cursor:
                cursor.execute(query)
                cli.commit()
                
        except Error as e:
            self.close_client()
            print(e)
            raise Error
    
    def run_multi(self, f_name):
        if not self.client:
            cli = self.connect_client()
        else:
            cli = self.client
        
        try:
            directory = os.path.abspath('./sql')
            
            with open(f'{directory}/{f_name}') as file:
                query = file.read()
                with cli.cursor() as cursor:
                    cursor.execute(query, multi=True)
                    cli.commit()
            
        except Error as e:
            self.close_client()
            print(e)
            raise Error
    
    def get_cursor(self, query):
        if not self.client:
            cli = self.connect_client()
        else:
            cli = self.client
        
        try:
            cursor = cli.cursor()
            cursor.execute(query)
            return cursor
        except Error as e:
            self.close_client()
            print(e)
            return
    
    def close_client(self):
        if not self.client:
            return
        
        self.client.close()
        self.client = None
    


In [86]:
client = MySQL()

In [28]:
client.connect_client()

query = """
        CREATE TABLE test(
            id INT NOT NULL AUTO_INCREMENT,
            number INT,
            text VARCHAR(100),
            PRIMARY KEY (id)
        )
        """

query2 = """
        INSERT INTO test (number, text)
        VALUES (10, "Thingy")
        """

client.run(query2)


In [29]:
client.close_client()

In [89]:
client = MySQL()

In [None]:
client.run_multi('create_table.sql')
client.close_client()

In [61]:
client.run_multi('load_data_test.sql')
client.close_client()

In [62]:
test = 'ALABAMA_fiber.csv'
idx = test.index('_')
test[idx + 1:-4]

'fiber'

### Inserting Cable CSV data into MySQL table

In [None]:
d_path = os.path.abspath('../FCC-DATA')
directory = os.listdir(d_path)
directory.sort()

for d in directory:
    if d[-1] != 'v':
        continue
        
    idx = d.index('_') + 1
    
    if d[idx:-4] == 'fiber':
        continue
    
    query = f"""
        LOAD DATA LOCAL INFILE '{d_path}/{d}'
        INTO TABLE cable
        FIELDS TERMINATED BY ','
        ENCLOSED BY '"'
        LINES TERMINATED BY '\n'
        IGNORE 1 ROWS
        (provider_id, frn, brand_name, location_id, block_fips, h3index_hex8, technology_code, max_advertised_download_speed, max_advertised_upload_speed, low_latency, business_residential_code);
        """
    
    try:
        client.run(query)
        print(f'{d} complete')
    except:
        print(f'{d} error')
        break
    

### Inserting Fiber CSV data into MySQL table

In [None]:
d_path = os.path.abspath('../FCC-DATA')
directory = os.listdir(d_path)
directory.sort()

for d in directory:
    if d[-1] != 'v':
        continue
        
    idx = d.index('_') + 1
    
    if d[idx:-4] == 'cable':
        continue
    
    query = f"""
        LOAD DATA LOCAL INFILE '{d_path}/{d}'
        INTO TABLE fiber
        FIELDS TERMINATED BY ','
        ENCLOSED BY '"'
        LINES TERMINATED BY '\n'
        IGNORE 1 ROWS
        (provider_id, frn, brand_name, location_id, block_fips, h3index_hex8, technology_code, max_advertised_download_speed, max_advertised_upload_speed, low_latency, business_residential_code);
        """

    try:
        client.run(query)
        print(f'{d} complete')
    except:
        print(f'{d} error')
        break

In [78]:
make_table = """
    CREATE TABLE Geography(
        id INT NOT NULL AUTO_INCREMENT,
        geography_type VARCHAR(100),
        geography_id INT,
        geography_desc VARCHAR(100),
        geography_desc_full VARCHAR(100),
        data_type VARCHAR(100),
        provider_id INT,
        res_st_pct DECIMAL(5,4),
        bus_iv_pct DECIMAL(5,4),
        PRIMARY KEY (id)
    )
    """

client.run(make_table)

In [79]:
f_path = os.path.abspath('../GEO-DATA/bdc_us_provider_summary_by_geography_063022.csv')

query = f"""
    LOAD DATA LOCAL INFILE '{f_path}'
    INTO TABLE Geography
    FIELDS TERMINATED BY ','
    ENCLOSED BY '"'
    LINES TERMINATED BY '\n'
    IGNORE 1 ROWS
    (geography_type, geography_id, geography_desc, geography_desc_full, data_type, provider_id, res_st_pct, bus_iv_pct);
    """

client.run(query)

In [82]:
ri_table = """
    CREATE TABLE ri_cable(
    id INT NOT NULL AUTO_INCREMENT,
    provider_id INT,
    frn VARCHAR(15),
    brand_name VARCHAR(100),
    location_id VARCHAR(100),
    block_fips VARCHAR(20),
    h3index_hex8 VARCHAR(100),
    technology_code INT,
    max_advertised_download_speed INT,
    max_advertised_upload_speed INT,
    low_latency INT,
    business_residential_code VARCHAR(5),
    PRIMARY KEY (id)
);
    """

client.run(ri_table)

In [84]:
client.run_multi('load_data_test.sql')

In [None]:
query = """
    CREATE
    """

cursor = client.get_cursor(query)

for provider_id,brand_name in cursor:
    print(f'{provider_id} {brand_name}')

cursor.close()

In [53]:
import h3.api.numpy_int as h3
import requests
from IPython.display import clear_output
import time
from pprint import pprint

class LocationData(MySQL):
    
    def __init__(self):
        super().__init__()
    
    def get_data(self):
        if not self.client:
            cli = self.connect_client()
        else:
            cli = self.client
        
        query = """
            SELECT DISTINCT h3index_hex8
            FROM cable
            LIMIT 8000
            OFFSET 2000;
            """

        try:
            c1 = cli.cursor(buffered = True)
            c2 = cli.cursor(buffered = True)
            
            print('Loading data..')
            c1.execute(query)
            
            print('Done')
            time.sleep(2)
            
            count, start = 2000, time.time()

            for (h3index_hex8) in c1:
                h = h3index_hex8[0]
                
                dur = time.time() - start
                clear_output(wait = True)

                print(f'Working {count} / 1,086,137')
                print(time.strftime('%H:%M:%S', time.gmtime(dur)))
                
                (lat,lon) = h3.h3_to_geo(h3.string_to_h3(h))
                (city, state) = self.get_loc_data(lat, lon)

                add_data = f"""
                    INSERT IGNORE INTO location (hex_num, city_town, state)
                    VALUES('{h}', '{city}', '{state}');
                    """

                c2.execute(add_data)
                cli.commit()
                count += 1
                
        except (Error, Exception) as e:
            print(e)

            self.close_client()
            return
            
        
        c1.close()
        c2.close()
        self.close_client()
    
    def get_loc_data(self, lat, lon):
        params = {
            'x': lon,
            'y': lat,
            'benchmark': 'Public_AR_Current',
            'vintage': 'Current_Current',
            'format': 'json'
        }
        
        try:

            r = requests.get('https://geocoding.geo.census.gov/geocoder/geographies/coordinates', params=params).json()

            state = r['result']['geographies']['States'][0]['STUSAB']
            city = r['result']['geographies']['County Subdivisions'][0]['NAME']
        
        except Exception as e:
            raise e

        return (city, state)


In [54]:
ld = LocationData()
ld.get_data()

Working 910 / 1,086,137
00:05:10
