# Imports

In [56]:
import pandas as pd
import numpy as np
from pathlib import Path

from glob import glob
import os

PATH = Path('../..').resolve()

# Organizing ALFALFA and MPA-JHU SDSS catalogs

In [57]:
df = pd.read_csv(f'{PATH}/data/a40.datafile3.csv')

df2 =  pd.read_csv(f'{PATH}/data/a40.datafile1.csv')
df2.rename(columns={'logMsun': 'logMHI'}, inplace=True)

ids = np.array([os.path.basename(fn).strip('.jpg') for fn in glob(f'{PATH}/images-OC/*.jpg')], dtype=int)

a40 = df.join(df2.set_index('AGCNr'), on='AGCNr', rsuffix='_sdss')
a40 = a40[(a40.AGCNr.isin(ids)) & (a40.OCcode == 'I') & (a40.logMHI.notna())].copy()

a40.rename(columns={'PhotoObjID': 'DR7ObjID'}, inplace=True)
a40 = a40[['AGCNr', 'DR7ObjID', 'logMHI']].copy()

In [24]:
sdss = pd.read_csv(f'{PATH}/data/SDSS_basic_data.csv')

sdss.rename(columns={'lgm_tot_p50': 'logMstar'}, inplace=True)
sdss.drop_duplicates(subset='DR7ObjID', keep=False, inplace=True) 
sdss = sdss[(sdss.logMstar > 0) & (sdss.logMstar.notna())].copy()
sdss = sdss[['DR7ObjID', 'logMstar']].copy()

# ALFALFA 40% catalogs

## a.40A

In [80]:
a40A = (
    pd.read_csv(f'{PATH}/data/a40-SDSS_gas-frac.csv', usecols=(1,2)) # get AGCNr
        .join(
            a40.set_index('DR7ObjID')
              .join(sdss.set_index('DR7ObjID'))          # match on DR7 ObjID
              .set_index('AGCNr'), on='AGCNr'            # reset index to AGCNr in order to match with a.40
        )
)

In [85]:
display(a40A.logMHI.quantile(0.50))
display(a40A.logMHI.quantile([0.16, 0.84]) - a40A.logMHI.quantile(0.50))

9.71

0.16   -0.50
0.84    0.35
Name: logMHI, dtype: float64

In [86]:
display(a40A.logMstar.quantile(0.50))
display(a40A.logMstar.quantile([0.16, 0.84]) - a40A.logMstar.quantile(0.50))

9.581133

0.16   -0.853462
0.84    0.847903
Name: logMstar, dtype: float64

In [87]:
display(a40A.logfgas.quantile(0.50))
display(a40A.logfgas.quantile([0.16, 0.84]) - a40A.logfgas.quantile(0.50))

0.12480099999999796

0.16   -0.675185
0.84    0.550076
Name: logfgas, dtype: float64

In [92]:
(a40A.logMstar.notnull()).sum()

7399

## a.40B

In [90]:
a40B = pd.read_csv(f'{PATH}/data/a40-SDSS_galaxy-properties.csv', index_col='AGCNr', usecols=(1, 3, 4, 7))

In [93]:
a40B.notnull().sum()

logMHI      4797
logMstar    4797
logfgas     4797
dtype: int64

In [94]:
display(a40B.logMHI.quantile(0.50))
display(a40B.logMHI.quantile([0.16, 0.84]) - a40B.logMHI.quantile(0.50))

9.68

0.16   -0.48
0.84    0.33
Name: logMHI, dtype: float64

In [95]:
display(a40B.logMstar.quantile(0.50))
display(a40B.logMstar.quantile([0.16, 0.84]) - a40B.logMstar.quantile(0.50))

9.420522

0.16   -0.682286
0.84    0.659805
Name: logMstar, dtype: float64

In [96]:
display(a40B.logfgas.quantile(0.50))
display(a40B.logfgas.quantile([0.16, 0.84]) - a40B.logfgas.quantile(0.50))

0.2394829999999999

0.16   -0.523041
0.84    0.450642
Name: logfgas, dtype: float64