In [None]:
# NOTES FROM MEETING
# No cell should take more than 20 minutes to run
# Please limit to one temporary collection (must be deleted at end of notebook). No external files
# Try to reduce single-line cells

# Mini-competition -
# Challenge: Limit to 50 lines of code (75 for team 1)! (excluding imports)
# Fit Success Rate: Fitting method with highest fit success rate
# Fit Average error: Fitting method with lowest fit error rate
# Fit Time complexity: Fitting method with lowest time expended
# Notebook housekeeping: Cleanest-looking notebook win!

# Sigmoid Criteria Curve Fitting: Algebraic Approach
**Contributors:** Justin Kaufman, Marco Scialanga

**Achievement:** Curve fitting with scipy. Curves used: x^2 / sqrt(1+x^2) (algebraic) and the generalized logistic function.

**Requirements:** 


## Example of cell markdown
**This is a note:** Please have markdown before each cell. In this format!

In [None]:
# Import packages you need
import sys
sys.path.append('../..')
import numpy as np
import matplotlib.pyplot as plt
from pymongo import MongoClient
import math
from scipy.optimize import curve_fit
from exploration.config import mongo_inst
import pandas as pd

In [None]:
# Instantiate your mongo instance, classes, and any config/global vars
client = MongoClient('localhost', 27017)
db = client.osu_random_db
osu_db = mongo_inst["osu_random_db"]

In [None]:
# Load the curve data
cursor = db["beatmap_criteria_curve"].find({},{"_id":1})
l = []
for el in cursor:
    l.append(el)
Ids = list(map(lambda x: x["_id"], l))

In [None]:
# Run all CDF curve fits & store in beatmap_criteria_curve collection
store_genLog()
store_alg()

In [None]:
# fit success-rate summary table with (3 rows 1 for each CDF fit) (4 columns 1* - 4*, 4* - 6*, 6* plus, all)
# will take around 3 minutes to run
collection = db["osu_beatmaps_attribs_modZero"]
db.attrib_17.insert_many(
    collection.aggregate([{'$match':{"beatmap_id": {'$in': Ids}, "attrib_id": 17}}]))

fit_lowDiff()
fit_mediumDiff()
fit_highDiff()
fit_all()

db["attrib_17"].drop()

dfSuccess = pd.DataFrame()
dfSuccess.insert(0, "Function", ["Generalized Logistic", "Algebraic"])
dfSuccess.insert(1, "SuccessRateLowDiff", [genLogSuccessLow, algSuccessLow])
dfSuccess.insert(2, "SuccessRateMediumDiff", [genLogSuccessMedium, algSuccessMedium])
dfSuccess.insert(3, "SuccessRateHighDiff", [genLogSuccessHigh, algSuccessHigh])
dfSuccess.insert(4, "OverallSuccessRate", [genLogSuccessAll, algSuccessAll])
dfSuccess

In [None]:
# for curves that could be fit, error summary table with same layout as above
# we already have the data from the cell above, it is now just a matter of building the table
dfMse = pd.DataFrame()
dfMse.insert(0, "Function", ["Generalized Logistic", "Algebraic"])
dfMse.insert(1, "AverageMseLowDiff", [genLogAverageMseLow, algAverageMseLow])
dfMse.insert(2, "AverageMseMediumDiff", [genLogAverageMseMedium, algAverageMseMedium])
dfMse.insert(3, "AverageMseHighDiff", [genLogAverageMseHigh, algAverageMseHigh])
dfMse.insert(4, "OverallAverageMse", [genLogAverageMseAll, algAverageMseAll])
dfMse

## Conclusion
The algebraic (alg) function has a much higher success rate than the generalized logistic(genLog), probably because of the fewer parameters. Thus, the slight advantage of the genLog over the algebraic for the mean squared error does not justify using genLog instead of alg.

In [None]:
def store_genLog():
    for el in Ids: 
        if fit_genLog(el):
            success = True
        else: success = False
    
        db.beatmap_criteria_curve.update_one( 
            {"_id" : el},
            {"$set": {"no_mod.mlpp.genLogistic.success" : success,
                      "no_mod.mlpp.genLogistic.params" : fit_genLog(el),
                      "no_mod.mlpp.genLogistic.mse" : mse_genLog(el)}})
def store_alg():
    for el in Ids: 
        if fit_alg(el):
            success = True
        else: success = False
    
    db.beatmap_criteria_curve.update_one( 
        {"_id" : el},
        {"$set": {"no_mod.mlpp.algebraic.success" : success,
                  "no_mod.mlpp.algebraic.params" : fit_alg(el),
                  "no_mod.mlpp.algebraic.mse" : mse_alg(el)}})

In [None]:
def makeArr(x):
    n=np.arange(0,x)
    return n

In [None]:
def algFunc(x, A, x0, k, off):
    f = A * (k*x - x0)/ (np.sqrt((k*x-x0)**2 + 1)) + off
    return f

def genLogFunc(x, A, K, B, Q, v):
    f = A + (K-A)/(1+Q*np.exp(-B*x)**(1/v))
    return f

In [None]:
def get_x_and_y(beatmap_id):
    
    beatmap = osu_db['beatmap_criteria_curve'].find_one({'_id': beatmap_id})
    
    x_temp = np.arange(0,98)
    y_temp = np.asarray(beatmap['no_mod']['n_pass'])/np.asarray(beatmap['no_mod']['total'])
    y = y_temp[np.logical_not(np.isnan(y_temp))]
    x = makeArr(len(y))
   
    return x,y

In [None]:
def fit_alg(beatmap_id):
    while True:
        try:
            x = get_x_and_y(beatmap_id)[0]
            y = get_x_and_y(beatmap_id)[1]
            popt, pcov = curve_fit(algFunc, x, y, maxfev = 1000)
            popt=list(popt)
            return popt
        except RuntimeError:
            return None
        except TypeError:
            return None
    
def fit_genLog(beatmap_id):
    while True:
        try:
            x = get_x_and_y(beatmap_id)[0]
            y = get_x_and_y(beatmap_id)[1]
            popt, pcov = curve_fit(genLogFunc, x, y, maxfev = 1000)
            popt=list(popt)
            return popt
        except RuntimeError:
            return None
        except TypeError:
            return None

In [None]:
def plot_fit_alg(popt, beatmap_id):
    
    x = get_x_and_y(beatmap_id)[0]
    y = get_x_and_y(beatmap_id)[1]
    
    f, ax = plt.subplots(figsize = (14, 12))
    plt.title('Fitting algebraic function for beatmap %d' %(beatmap_id))
    plt.plot(x,y,label = 'original')
    plt.plot(x, algFunc(x, *popt), 'r-',label = 'Fitted logistic function')
    plt.legend()
    
def plot_fit_genLog(popt, beatmap_id):
    
    x = get_x_and_y(beatmap_id)[0]
    y = get_x_and_y(beatmap_id)[1]
    
    f, ax = plt.subplots(figsize = (14, 12))
    plt.title('Fitting generalized logistic function for beatmap %d' %(beatmap_id))
    plt.plot(x,y,label = 'original')
    plt.plot(x, genLogFunc(x, *popt), 'r-',label = 'Fitted logistic function')
    plt.legend()

In [None]:
def mse_alg(beatmap_id):
    while True:
        try:
            x = get_x_and_y(beatmap_id)[0]
            y = get_x_and_y(beatmap_id)[1]
            popt, pcov = curve_fit(algFunc, x, y, maxfev = 1000)
            mse = np.mean((y-algFunc(x, *popt))**2)
            return mse
        except RuntimeError:
            return None
        except TypeError:
            return None

def mse_genLog(beatmap_id):
    while True:
        try:
            x = get_x_and_y(beatmap_id)[0]
            y = get_x_and_y(beatmap_id)[1]
            popt, pcov = curve_fit(genLogFunc, x, y, maxfev = 1000)
            mse = np.mean((y-genLogFunc(x, *popt))**2)
            return mse
        except RuntimeError:
            return None
        except TypeError:
            return None

In [None]:
beatmap_id = 104229
f, ax = plt.subplots(figsize = (14, 12))

x = get_x_and_y(beatmap_id)[0]
y = get_x_and_y(beatmap_id)[1]

popt = fit_alg(beatmap_id)
plt.plot(x, algFunc(x, *popt), label = 'Fitted algebraic function')

popt = fit_genLog(beatmap_id)
plt.plot(x, genLogFunc(x, *popt), label = 'Fitted generalised logistic function')

plt.plot(x,y,label = 'original')
plt.title('Fitting functions for beatmap %d' %(beatmap_id))
plt.legend()

In [None]:
def genLogSuccess(idList):
    success = 0
    for el in idList:
        if fit_genLog(el):
            success += 1
    success_rate = success / len(idList)
    return success_rate

def algSuccess(idList):
    success = 0
    for el in idList:
        if fit_alg(el):
            success += 1
    success_rate = success / len(idList)
    return success_rate

In [None]:
def genLogAverageMse(idList):
    totalMse = 0
    success = 0
    for el in idList:
        if mse_genLog(el):
            totalMse += mse_genLog(el)
            success += 1
    avgMse = totalMse / success
    return avgMse

def algAverageMse(idList):
    totalMse = 0
    success = 0
    for el in idList:
        if mse_alg(el):
            totalMse += mse_alg(el)
            success += 1
    avgMse = totalMse / success
    return avgMse

In [None]:
def fit_all():
    cursor = db["beatmap_criteria_curve"].find({},{"_id":1})
    l = []
    for el in cursor:
        l.append(el)
    Ids = list(map(lambda x: x["_id"], l))
    global genLogSuccessAll
    global algSuccessAll
    global genLogAverageMseAll
    global algAverageMseAll
    genLogSuccessAll = genLogSuccess(Ids)
    algSuccessAll = algSuccess(Ids)
    genLogAverageMseAll = genLogAverageMse(Ids)
    algAverageMseAll = algAverageMse(Ids)

In [None]:
def fit_lowDiff():
    collection = db["attrib_17"]
    cursor = db.attrib_17.aggregate([
        {"$match" : {"value" : {"$gte" : 1, "$lt": 4}}},
        {"$project" : {"beatmap_id" : "$beatmap_id"}}
    ])
    l = list(cursor)
    lowIds = list(map(lambda x: x["beatmap_id"], l))
    global genLogSuccessLow
    global algSuccessLow
    global genLogAverageMseLow
    global algAverageMseLow
    genLogSuccessLow = genLogSuccess(lowIds)
    algSuccessLow = algSuccess(lowIds)
    genLogAverageMseLow = genLogAverageMse(lowIds)
    algAverageMseLow = algAverageMse(lowIds)

In [None]:
def fit_mediumDiff():
    cursor = db.attrib_17.aggregate([
        {"$match" : {"value" : {"$gte" : 4, "$lt": 6}}},
        {"$project" : {"beatmap_id" : "$beatmap_id"}}
    ])
    l = list(cursor)
    mediumIds = list(map(lambda x: x["beatmap_id"], l))
    global genLogSuccessMedium
    global algSuccessMedium
    global genLogAverageMseMedium
    global algAverageMseMedium
    genLogSuccessMedium = genLogSuccess(mediumIds)
    algSuccessMedium = algSuccess(mediumIds)
    genLogAverageMseMedium = genLogAverageMse(mediumIds)
    algAverageMseMedium = algAverageMse(mediumIds)

In [None]:
def fit_highDiff():
    cursor = db.attrib_17.aggregate([
        {"$match" : {"value" : {"$gte" : 6}}},
        {"$project" : {"beatmap_id" : "$beatmap_id"}}
    ])

    l = list(cursor)
    highIds = list(map(lambda x: x["beatmap_id"], l))
    global genLogSuccessHigh
    global algSuccessHigh
    global genLogAverageMseHigh
    global algAverageMseHigh
    genLogSuccessHigh = genLogSuccess(highIds)
    algSuccessHigh = algSuccess(highIds)
    genLogAverageMseHigh = genLogAverageMse(highIds)
    algAverageMseHigh = algAverageMse(highIds)