### Import libraries.

In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import requests
import configparser
import mysql.connector
import sqlalchemy
import plotly.express as px
import plotly.graph_objs as go

### Retrieve database authentication data and connect to database.

In [52]:
#setup ini file and assign variables to authenticate connection.
config = configparser.ConfigParser()
config.read('../config.ini')

#access database authentication data.
host = config['mysql']['host']
user = config['mysql']['user']
password = config['mysql']['password']
database = config['mysql']['database']

#create an sqllchemy engine object to authenticate connection.
engine = sqlalchemy.create_engine(f'mysql+mysqlconnector://{user}:{password}@{host}/{database}')

### Top 10 countries by monthly mentorship price.

In [53]:
top10_countries_avg_price = engine.execute(
    """ 
    select country, avg(dollars_month)
    from mentors_profiles
    group by country
    order by avg(dollars_month) desc
    limit 10
    """
)

top10_countries_avg_price_df = pd.DataFrame(top10_countries_avg_price.fetchall(), columns = top10_countries_avg_price.keys())

fig = px.bar(top10_countries_avg_price_df, x = 'country', y = 'avg(dollars_month)')

fig.show()

### Relationship between services offered and price.

In [79]:
#retrieve data.
services_price = engine.execute(
    """
    select service_1, service_2, service_3, service_4, dollars_month
    from mentors_profiles;
    """
)

#assign to dataframe.
services_price_df = pd.DataFrame(services_price.fetchall(), columns = services_price.keys())

#count number of services offered by each user.
services_price_df['services_count'] = services_price_df.count(axis = 1)
services_price_df.drop(columns = ['service_1', 'service_2', 'service_3', 'service_4'], inplace = True)

#assign to variables.
x = services_price_df.services_count
y = services_price_df.dollars_month

df = services_price_df





# Calculate the coefficients of a linear regression model
coeffs = np.polyfit(df['services_count'], df['dollars_month'], 1)
m = coeffs[0]
b = coeffs[1]

# Create a new DataFrame with the line of best fit and error bars
x_vals = np.linspace(df['services_count'].min(), 5.0, 100)
y_vals = m*x_vals + b
y_upper = y_vals + 50*x_vals
y_lower = y_vals - 50*x_vals
df_fit = pd.DataFrame({'services_count': x_vals, 'dollars_month': y_vals, 'error_upper': y_upper, 'error_lower': y_lower})

# Create the plot
fig = go.Figure()

# Add the line plot with shaded error bars
fig.add_trace(go.Scatter(x=df_fit['services_count'], y=df_fit['dollars_month'], mode='lines', name='Line of Best Fit',
                         fill='tonexty', line=dict(color='blue', width=2), 
                         fillcolor='rgba(0,0,255,0.2)', 
                         hovertemplate='Services Count: %{x}<br>Dollars per Month: %{y}'))

# Add the regression coefficient to the plot
equation = f'y = {m:.2f}x + {b:.2f}'
fig.update_layout(title='Line of Best Fit with Continuous Shaded Error Bars',
                  xaxis_title='Services Count',
                  yaxis_title='Dollars per Month',
                  xaxis=dict(range=[0, 5]),
                  yaxis=dict(range=[150, 250]),
                  width=600,
                  annotations=[dict(text=equation, x=0.05, y=0.95, showarrow=False, 
                                     font=dict(size=16, color='black'), 
                                     xref='paper', yref='paper', align='left')])

# Show the plot
fig.show()

### How does the relationship between number of services offered and country vary by country?

In [None]:
### 3. Relationship between ratings and dollars per month charged.
rating_price = engine.execute(
    """
    select rating, dollars_month
    from mentors_profiles;
    """
)

fig

### Industries that charged the highest mentorship price?

### Percentage of mentors from large tech companies (FAANG.etc.). Is there a relationship between company and price?

### Which industries received the highest ratings?

### Which industries received the highest ratings?

In [2]:
import random
import numpy as np
import simpy

ModuleNotFoundError: No module named 'simpy'