In [None]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import multiprocessing 
import time
import h5py
import time

import requests

baseUrl = 'http://www.tng-project.org/api/'
headers = {"api-key":"f3c97208f4981e63b57bb02c7135912b"}

def get(path, params=None):
    # make HTTP GET request to path
    r = requests.get(path, params=params, headers=headers)

    # raise exception if response code is not HTTP SUCCESS (200)
    r.raise_for_status()

    if r.headers['content-type'] == 'application/json':
        return r.json() # parse json responses automatically
    return r

r = get(baseUrl)

In [None]:
tstart = time.time()

url_z3 = "http://www.tng-project.org/api/TNG100-1/snapshots/25/subhalos/?limit=10000&offset=0"
subhalos_z3 = get(url_z3)
print("Elapsed Time: {:.2f}s".format(time.time() - tstart))

In [None]:
sub_z3 = [] ## array that contains subhalo information 
tstart = time.time()

len_sub = 10000

def func(sub):
    for i in range(len_sub):
        sub.append(get(subhalos_z3['results'][i]['url']))
        print(i)
    return (sub)

func(sub_z3)
print("Elapsed Time: {:.2f}s".format(time.time() - tstart))

In [None]:
pd.set_option('display.max_columns', None)
data_z3 = pd.DataFrame(sub_z3)
data_z3 = data_z3.drop(['id','snap','bhmdot','cm_x', 'cm_y','cm_z','pos_x','pos_y','pos_z','sfrinhalfrad','sfrinmaxrad', 'sfrinrad',
                  'spin_x','spin_y', 'spin_z', 'prog_snap', 'windmass', 'prog_snap', 'prog_sfid', 'desc_snap','desc_sfid','parent','grnr','primary_flag', 
                 'related','cutouts','trees','supplementary_data','vis','meta', 'massinhalfrad','massinhalfrad_gas','massinhalfrad_dm','massinhalfrad_stars',
                'massinhalfrad_bhs','massinmaxrad','massinmaxrad_gas','massinmaxrad_dm','massinmaxrad_stars','massinmaxrad_bhs','massinrad','gasmetallicitysfr',
                'gasmetallicitysfrweighted','gasmetallicityhalfrad','gasmetallicitymaxrad','massinrad_gas','massinrad_dm','massinrad_stars','massinrad_bhs',
                'starmetallicityhalfrad','starmetallicitymaxrad', 'stellarphotometricsrad', 'vel_x','vel_y','vel_z'],axis = 1)
data_z3

In [None]:
Mlog = data_z3['mass_log_msun'] ## log of total mass in solar units
Mtot = data_z3['mass'] ## total mass solar units

## all in units of magnitude
V = data_z3['stellarphotometrics_v']
G = data_z3['stellarphotometrics_g']
U = data_z3['stellarphotometrics_u']
I = data_z3['stellarphotometrics_i']
B = data_z3['stellarphotometrics_b']
R = data_z3['stellarphotometrics_r']
Z = data_z3['stellarphotometrics_z']

vel = data_z3['vmax'] ## km/s
vel = np.log10(vel)

ug = U-G
gr = G - R
ri = R - I
iz = I - Z

In [None]:
fig, (ax4) = plt.subplots(ncols=1, figsize=(16, 4))

ax4.plot(vel, Mlog, '.', ms=4, c='b', label='vel vs Mlog', alpha=0.4)
ax4.legend(loc=2)
ax4.set_xlabel('$Velocity (km/s)$')
ax4.set_ylabel('$Mass (log Msun$')

In [None]:
fig, (ax0) = plt.subplots(ncols=1)

ax0.plot(vel, Mlog, '.')
ax0.set_ylabel('$Magnitude (mag)$')
ax0.set_xlabel('$Velocity (km/s)$')

In [None]:
sfr_bool = []

## using for loop to iterate over data and calculate if SFR or not
for i in range(10000):
    if (data_z3['sfr'][i]) > 0:
        sfr_bool += [1]
    else:
        sfr_bool += [0]

In [None]:
data_z3 = data_z3.drop('sfr',1)

In [None]:
from sklearn.model_selection import train_test_split

X = data_z3
y = sfr_bool

X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0)

In [None]:
import matplotlib.pyplot as plt
from sklearn.inspection import permutation_importance

## feature importance plot
def plot_feature_importances(model):
    n_features = X_test.shape[1]
    plt.barh(np.arange(n_features), model.feature_importances_, align='center')
    plt.yticks(np.arange(n_features), (data_z3.head()))
    plt.xlabel("Feature importance")
    plt.ylabel("Feature")
    plt.ylim(-1, n_features)
    plt.gcf().set_size_inches(15, 10)
    
plot_feature_importances(model)