In [1]:
import sqlite3
import geopandas as gpd
from geopy.geocoders import Nominatim
from shapely.geometry import Point, LineString
from sklearn.linear_model import LinearRegression

<h1 align="left" style="font-size:20px"><b> General Methods </b></h1>

In [None]:
def get_col_vals(_df, _search_col, _tot_val):
    lst_analysis = _df[_search_col]
    df_analysis = pd.DataFrame({'answer':lst_analysis.values})
    df_analysis_grp = df_analysis.groupby('answer').size().reset_index()
    df_analysis_grp = df_analysis_grp.rename(columns={"answer": "Answer", 0: "Count"})
    df_analysis_grp.insert(2, "Percentage", 100 * (df_analysis_grp['Count'] / _tot_val), True)
    df_analysis_grp.insert(3, "Total", _tot_val, True)
    return df_analysis_grp

In [None]:
#https://docs.python.org/3/library/sqlite3.html
class Sqlite3Db:
    def __init__(self, name=None):
        self.conn = None
        self.cursor = None

        if name:
            self.open(name)
    
    def open(self, name):
        try:
            self.conn = sqlite3.connect(name)
            self.cursor = self.conn.cursor()
        except sqlite3.Error as e:
            print("Error connecting to database " + name + " with message:" + e.args[0] )
    
    def close(self):
        if self.conn:
            self.cursor.close()
            self.conn.close()

    def query(self,sqlStm):
        self.cursor.execute(sqlStm)

    #Using these magic methods (__enter__, __exit__) allows you to implement objects which can be used easily with the with statement.
    def __enter__(self):
        return self.conn
    
    def __exit__(self,exc_type,exc_value,traceback):
        self.close()

In [None]:
def geolocate(country):
    geolocator = Nominatim(user_agent="bts_dsf")
    try:
        # Geolocate the center of the country
        loc = geolocator.geocode(country)
        # And return latitude and longitude
        return (loc.latitude, loc.longitude)
    except:
        # Return missing value
        return np.nan

In [None]:
def build_geodf(df, lat_col_name='latitude', lon_col_name='longitude'):
    df = df.copy()
    lat = df['latitude']
    lon = df['longitude']
    return gpd.GeoDataFrame(df, geometry=gpd.points_from_xy(lon, lat))

In [7]:
def build_linestring(df):
    point_source = [Point(xy) for xy in zip(df.source_lon, df.source_lat)]
    point_dest = [Point(xy) for xy in zip(df.dest_lon, df.dest_lat)]
    
    linestring = [LineString(xy) for xy in zip(point_source, point_dest)]
    gdf_routes_lines = gpd.GeoDataFrame(df, geometry=linestring)
    return gdf_routes_lines

In [None]:
def predict_age(X_train, Y_train, X_test):
    
    model = LinearRegression()
    model.fit(X_train, Y_train)
    
    print('intercept:', model.intercept_)
    print('slope:', model.coef_)

    y_pred = model.predict(X_test)

    return y_pred