In [9]:
import plotly.io as pio
pio.renderers.default="iframe"
import pandas as pd
import sqlite3
from plotly import express as px
from sklearn.linear_model import LinearRegression
import seaborn as sns
from matplotlib import pyplot as plt

In [10]:
conn = sqlite3.connect("schools.db") # this creates a database called schools.db.

In [11]:
df_iter = pd.read_csv("schools.csv", chunksize = 1000) # reads in our dataframe in digestible chunks of 1000 rows at a time

In [12]:
for df in df_iter:
    df.to_sql("schools", conn, if_exists = "append", index = False) # writes each chunk of the dataframe into a SQL table

In [113]:
def query_schools_database(state):
    """
    query_schools_database uses SQL to read through a database containing relevant school district location.

    :state: the name of the state to be investigated
    """
    
    cmd = \
    f"""
    SELECT S.statename, S.lea_name "Name", S.mstreet1, S.mzip, S.mcity "city"
    FROM schools S
    WHERE S.statename = "{state}"
    """
    
    return pd.read_sql_query(cmd, conn)

In [114]:
df = query_schools_database("ALABAMA")

In [115]:
df

Unnamed: 0,STATENAME,Name,MSTREET1,MZIP,city
0,ALABAMA,Alabama Youth Services,1000 Industrial School Road,36057,Mt Meigs
1,ALABAMA,Albertville City,8379 US Highway 431,35950,Albertville
2,ALABAMA,Marshall County,12380 US Highway 431 S,35976,Guntersville
3,ALABAMA,Hoover City,2810 Metropolitan Way,35243,Hoover
4,ALABAMA,Madison City,211 Celtic Dr,35758,Madison
...,...,...,...,...,...
152,ALABAMA,Winston County,PO Box 9,35553,Double Springs
153,ALABAMA,Orange Beach City,23908 Canal Road,36561,Orange Beach
154,ALABAMA,Empower Community School,2400 7th Avenue North,35020,Bessemer
155,ALABAMA,Covenant Academy of Mobile,4568 Halls Mill Road,36693,Mobile


In [16]:
import requests
import json
import urllib

In [17]:
base_url = "https://maps.googleapis.com/maps/api/geocode/json?"
AUTH_KEY = "AIzaSyA0OHJ1LNhJKDEP6by00IqFVDhkoBJZxLo"

In [None]:
import requests
import json
import urllib

In [None]:
base_url = "https://maps.googleapis.com/maps/api/geocode/json?"
AUTH_KEY = "AIzaSyA0OHJ1LNhJKDEP6by00IqFVDhkoBJZxLo"

In [18]:
from geopy.geocoders import Nominatim
geolocator = Nominatim(user_agent = "example app")

In [None]:
geolocator.geocode("1 Apple Park Way, Cupertino, CA")

In [None]:
def geocode_address(address):
    location = geolocator.geocode(address)
    if location:
        return location.latitude, location.longitude
    else:
        return None, None

In [None]:
school_data[['latitude','longitude']]=school_data['MSTREET1'].apply(geocode_address)
lot_long_df = school_data[['latitude','longitude']]

In [116]:
district_lats_lons = []

In [27]:
for add in df.index:
    district_lats_lons.append(df['MSTREET1'][add])

In [117]:
for index, row in df.iterrows():
    address = row['MSTREET1']
    city = row['city']
    zip_code = row['MZIP']
    formatted_entry = f"{address}, {city}, {zip_code}"
    district_lats_lons.append(formatted_entry)

In [118]:
district_lats_lons

['1000 Industrial School Road, Mt Meigs, 36057',
 '8379 US Highway 431, Albertville, 35950',
 '12380 US Highway 431 S, Guntersville, 35976',
 '2810 Metropolitan Way, Hoover, 35243',
 '211 Celtic Dr, Madison, 35758',
 'PO Box 698, Talladega, 35161',
 '1255 Dauphin St, Mobile, 36604',
 '1517 Hurst Avenue, Leeds, 35094',
 '126 Newt Parker Dr, Boaz, 35957',
 '476 Main Street, Trussville, 35173',
 '1800 Rev Abraham Woods Jr Blvd, Birmingham, 35203',
 '375 Lee Street, Alexander City, 35010',
 '1201 C C Baker Avenue, Andalusia, 36421',
 'PO Box 1500, Anniston, 36202',
 '750 Arabian Dr NE, Arab, 35016',
 '455 US Highway 31 North, Athens, 35611',
 'P O Box 220350, Deatsville, 36022',
 '101 Case Ave, Attalla, 35954',
 '4010 Lil Spartan Drive, Saraland, 36571',
 'P O Box 11493, Chickasaw, 36611',
 '220 Baker Road, Satsuma, 36572',
 '10111 Highway 119, Alabaster, 35007',
 '3160 Pelham Parkway, Pelham, 35124',
 '9045 Vaughn Road, Montgomery, 36117',
 '3725 AIRPORT BLVD, MOBILE, 36608',
 'PO Box 105

In [119]:
district_lats_lons_2 = []

In [120]:
failed_addresses = []

In [121]:
for add in district_lats_lons:
    try:
        location = geolocator.geocode(add)
        if location is not None:  # Check if the geocoding result is not None
            district_lats_lons_2.append([location.latitude, location.longitude])
        else:
            failed_addresses.append(add)  # Add the address to the failed_addresses list
    except:
        failed_addresses.append(add)  # Add the address to the failed_addresses list

In [122]:
df2 = pd.DataFrame(district_lats_lons_2, columns=['Latitude', 'Longitude'])

In [135]:
extracted_addresses = []
for add in failed_addresses:
    parts = add.split(', ')
    street_address = parts[0]
    extracted_addresses.append(street_address)

In [137]:
df = df[~df['MSTREET1'].isin(extracted_addresses)]

In [138]:
df.reset_index(drop=True, inplace=True)

In [139]:
df.loc[:, 'latitude'] = df2['Latitude']

In [140]:
df.loc[:, 'longitude'] = df2['Longitude']

In [141]:
df

Unnamed: 0,STATENAME,Name,MSTREET1,MZIP,city,latitude,longitude
0,ALABAMA,Hoover City,2810 Metropolitan Way,35243,Hoover,33.407615,-86.765705
1,ALABAMA,Madison City,211 Celtic Dr,35758,Madison,34.687180,-86.744949
2,ALABAMA,Al Sch Of Math And Science,1255 Dauphin St,36604,Mobile,30.687377,-88.064519
3,ALABAMA,Leeds City,1517 Hurst Avenue,35094,Leeds,33.549984,-86.534760
4,ALABAMA,Boaz City,126 Newt Parker Dr,35957,Boaz,34.212836,-86.159496
...,...,...,...,...,...,...,...
79,ALABAMA,Wilcox County,75 Camden Bypass,36726,Camden,32.003772,-87.289639
80,ALABAMA,Orange Beach City,23908 Canal Road,36561,Orange Beach,30.289412,-87.618389
81,ALABAMA,Empower Community School,2400 7th Avenue North,35020,Bessemer,33.411102,-86.953914
82,ALABAMA,Covenant Academy of Mobile,4568 Halls Mill Road,36693,Mobile,30.615858,-88.146102


In [144]:
fig = px.scatter_mapbox(df, lat='latitude', lon='longitude', zoom=1, mapbox_style='carto-positron', hover_name='Name')


In [145]:
fig.show()