In [279]:
import dask
import dask.dataframe as dd
import pandas as pd
from pathlib import Path 

import numpy as np
import scipy.stats as sps
from sklearn import linear_model

In [14]:
PATH_MAVEN = Path("/home/marek")
PATH_NGI = Path(PATH_MAVEN) / "maven" / "data" / "sci" / "ngi"
PATH_NGI_L2 = Path(PATH_NGI) / "l2"

In [283]:
test_dir = Path(PATH_NGI_L2, "2019/04/*.csv")

In [284]:
ddf = dd.read_csv(test_dir)

In [285]:
def IO_orb(orbdata,io='I'):
    minalt = orbdata['alt'].min()
    peri_t = orbdata[orbdata['alt']==minalt]['t_unix'].unique()
    #if len(peri_t)>1:
    #    sys.exit('Non-unique time found at periapse '+str(orbdata['orbit'].unique()))
    #else:
    if io == 'I':
        return orbdata[orbdata['t_unix']<=peri_t[0]]
    elif io =='O':
        return orbdata[orbdata['t_unix']>peri_t[0]]
    else:
        return orbdata

In [286]:
ddf = ddf[["orbit", "alt", "species", "abundance", "t_unix"]]
ddf = ddf[ddf["abundance"] > 0.]
ddf = ddf[ddf["alt"] < 250]

In [287]:
ddf = ddf.map_partitions(IO_orb)

In [288]:
ddf.head()

Unnamed: 0,orbit,alt,species,abundance,t_unix
141,8821,249.8525,Ar,2655.67213,1554084000.0
142,8821,248.7934,Ar,4035.615324,1554084000.0
143,8821,247.7385,Ar,5623.915851,1554084000.0
144,8821,246.6881,Ar,5222.673311,1554084000.0
145,8821,245.6419,Ar,7218.744247,1554084000.0


In [289]:
orbs = ddf["orbit"].unique().compute().tolist()

In [290]:
def df_from_orb_range(daskdf, orbit, orb_step=5):
    look_for_orbs = list(range(orbit - orb_step, orbit + orb_step))
    temp_ddf = daskdf[daskdf["orbit"].isin(look_for_orbs)]
    temp_df = temp_ddf.compute()
    return temp_df

def pivot_df(df):
    temp_df = df.pivot_table(values=["abundance"], index=["orbit","alt", "species"]).unstack()
    return temp_df

def make_ratio_col(df):
    df["N2/Ar"] = df["abundance"]["N2"] / df["abundance"]["Ar"]
    return df

def fit_ratio_alt(df):
    x = df[["alt"]]
    y = np.log(df["N2/Ar"])
    lr = linear_model.LinearRegression()
    lr.fit(x, y)
    return lr

def hp_from_fit(ratio, slope, intercept):
    return (np.log(ratio)-intercept)/slope

In [291]:
for orb in orbs:
    temp_df = df_from_orb_range(ddf, orb, orb_step=5)
    norbs = len(temp_df["orbit"].unique())
    temp_df = pivot_df(temp_df)
    temp_df = make_ratio_col(temp_df)
    final_df = temp_df["N2/Ar"].reset_index().dropna(subset=["alt", "N2/Ar"])
    fit = fit_ratio_alt(final_df)
    hp = hp_from_fit(1.25, float(fit.coef_),fit.intercept_)
    print(orb, hp, norbs, float(fit.coef_), fit.intercept_)

8821 113.29298072356356 5 0.028770023253179646 -3.036298138524747
8822 112.44496088253567 6 0.028404725678392327 -2.970824716471772
8823 112.54641040177916 7 0.028427798527163893 -2.976303128543071
8824 112.3957054374894 8 0.0282876882148322 -2.9562711207876102
8825 112.9993835096083 9 0.028648986707632364 -3.01417428482321
8826 113.03617273682212 10 0.02849736439853901 -2.998089453383211
8827 111.79872044508122 10 0.02810250753539504 -2.9186808324412086
8828 112.05840466743292 10 0.02844542429781979 -2.9644053155877095
8829 110.71789012044749 10 0.028488586828473175 -2.931052674847511
8830 110.27634862574082 10 0.028119649135694404 -2.877788680007138
8831 109.94655231105604 10 0.0277989581198509 -2.833256051802833
8832 110.28952867254918 10 0.02775403060581697 -2.8378354029648483
8833 109.98709427377761 10 0.027502065741478645 -2.8017287461174316
8834 110.11349282336653 10 0.02799952242287911 -2.8599816600551793
8835 109.22469792322494 10 0.02773424307769498 -2.806120770976317
8836 10