In [None]:
# Reads in photometry from different sources, normalizes them, and puts them
# onto a BJD time scale

# Created 2021 Dec. 28 by E.S.

In [13]:
import numpy as np
import pandas as pd
from astropy.time import Time
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler

%matplotlib qt

In [14]:
## USER INPUTS

photometry_source_file = "uy_cam_aavso_prepolished.txt"
bjd_file = "uy_cam_bjd.csv" # generated as-we-go
writeout_polished_file = "uy_cam_aavso_polished.txt"

In [15]:
stem_super = "/Users/bandari/Documents/git.repos/rrlyrae_metallicity/notebooks_for_development/"
stem_moore_macadam = stem_super + "polished_macadam_moore_photometry/"
stem_public_sources = stem_super + "all_photometry_program_stars/polished/"
bjd_sources = stem_super + "all_photometry_program_stars/bjds/"

In [16]:
# read in photometry

df = pd.read_csv(stem_public_sources + photometry_source_file)

In [17]:
# if there's a bunch of too-old stuff, remove it
#df = df.where(np.logical_and(df["JD"]>2456474-400,df["JD"]<2456474+400)).dropna(how="all").reset_index()
df = df.where(df["JD"]<=2459238.567486600).dropna(how="all").reset_index()

In [18]:
# write out only JD, for conversion

df["JD"].to_csv("junk_jds.csv", header=False, index=False)

# at this point, do conversion step manually online

In [None]:
# FYI: location of Wichita (approx. center of U.S.)

'''
lat_wichita = 37.688889
lon_wichita = -97.336111
'''

# read bjds back in and tack onto the dataframe

In [19]:
intermed_bjd = pd.read_csv(bjd_sources + bjd_file, names=["BJD"])
print(bjd_sources + bjd_file)

df["BJD"] = intermed_bjd.values

#df = df.join(intermed_bjd["BJD"])

/Users/bandari/Documents/git.repos/rrlyrae_metallicity/notebooks_for_development/all_photometry_program_stars/bjds/uy_cam_bjd.csv


In [20]:
len(df)

7419

In [10]:
test

Unnamed: 0,2456102.797595744
0,2.456103e+06
1,2.456103e+06
2,2.456103e+06
3,2.456103e+06
4,2.456103e+06
...,...
4866,2.456808e+06
4867,2.456808e+06
4868,2.456808e+06
4869,2.456808e+06


# look at the data, choose what we want

In [21]:
plt.clf()
plt.scatter(df["BJD"],
            df["Magnitude"])
plt.show()

In [22]:
plt.clf()
plt.scatter(df["BJD"].where(df["Band"] == "V"),
            df["Magnitude"].where(df["Band"] == "V"), color="k")
#plt.scatter(df["BJD"].where(df["Band"] == "V"),
#            df["Magnitude"].where(df["Band"] == "V"), color="b")
plt.show()

In [18]:
plt.clf()
plt.scatter(df["BJD"].where(df["Observer Code"] == "SAH"),
            df["Magnitude"].where(df["Observer Code"] == "SAH"), color="k")
plt.show()

In [13]:
df.keys()

Index(['JD', 'Magnitude', 'Uncertainty', 'HQuncertainty', 'Band',
       'Observer Code', 'Comment Code(s)', 'Comp Star 1', 'Comp Star 2',
       'Charts', 'Comments', 'Transfomed', 'Airmass', 'Validation Flag',
       'Cmag', 'Kmag', 'HJD', 'Star Name', 'Observer Affiliation',
       'Measurement Method', 'Grouping Method', 'ADS Reference', 'Digitizer',
       'Credit', 'BJD'],
      dtype='object')

In [14]:
print(np.min(df["JD"]))
print(np.max(df["JD"]))

2440130.641
2459517.3388900002


In [12]:
df["Band"].value_counts()

V     7715
TG     454
R       26
Name: Band, dtype: int64

In [13]:
df["Observer Code"].value_counts()

SAH     3832
DKS     1937
MZK     1153
CGRD     454
KGE      324
LDJ      254
PRX      213
PTOB     178
BIZ      100
LBG       48
SRMB      26
Name: Observer Code, dtype: int64

In [23]:
# choose photometric subset

#df_subset_photo = df

#df_subset_photo = df.where(np.logical_or(df["BJD"] < 2455109,df["BJD"] > 2455535))
df_subset_photo = df.where(df["Band"] == "V")
#df_subset = df[["JD", "sdf"]]

In [24]:
# check to confirm

plt.scatter(df_subset_photo["BJD"],df_subset_photo["Magnitude"])
plt.show()

In [13]:
np.max(df_subset_photo["BJD"])

2456809.3022545683

In [14]:
df_subset_photo.keys()

Index(['index', 'JD', 'Magnitude', 'Uncertainty', 'HQuncertainty', 'Band',
       'Observer Code', 'Comment Code(s)', 'Comp Star 1', 'Comp Star 2',
       'Charts', 'Comments', 'Transfomed', 'Airmass', 'Validation Flag',
       'Cmag', 'Kmag', 'HJD', 'Star Name', 'Observer Affiliation',
       'Measurement Method', 'Grouping Method', 'ADS Reference', 'Digitizer',
       'Credit', 'BJD'],
      dtype='object')

# feature scale the photometry

### wait! scaling of error bars won't be the same, since an offset is being subtracted off

In [29]:
'''
scaler = MinMaxScaler() # min-max mapped to 0-to-1
df_subset_photo[['mag_scaled']] = scaler.fit_transform(df_subset_photo[['Magnitude']])
# df_subset_photo[['error_scaled']] = scaler.transform(df_subset_photo[['Uncertainty']])
'''

In [21]:
'''
plt.scatter(df_subset_photo["BJD"],df_subset_photo["mag_scaled"])
plt.show()
'''

# write out as 'polished' photometry

In [None]:
# cols BJD, mag, error

In [25]:
# remove rows of all NaNs

df_subset_photo_dropna = df_subset_photo.dropna(axis=0, how='all')

In [26]:
# write out only the relevant cols

df_subset_photo_dropna.to_csv(stem_public_sources + writeout_polished_file, 
                              columns=["BJD","Magnitude","Uncertainty"], index=False)
print(stem_public_sources + writeout_polished_file)

/Users/bandari/Documents/git.repos/rrlyrae_metallicity/notebooks_for_development/all_photometry_program_stars/polished/uy_cam_aavso_polished.txt
