In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [3]:
# I think the best is to use the column indexs
def read_lc3(flpath,data_dict,header_nrows,delim_whitespace):
    
    '''Function to read the lc from the data
    
    Input:
    
    flpath: the location of the file
    
    data_dict: dictionary with the keys ('time','mag','mag_err','flux','flux_err','filters')
               the user provides the values corresponding to the keys
               e.g {'time':1}, were 1 is the time column index
               
    brightness_unit: Units used to measure the brightness
                     can either be 'flux' or 'mags'
                     
    header_nrows: The number of rows the header covers
    
    delim_whitespace: True when the data is not separated by a comma, false otherwise
               
   Output:
   
   standardized pandas dataframe with lc data'''
    
    
    # Reading-in the data
    data = pd.read_csv(flpath,skiprows=header_nrows,delim_whitespace=delim_whitespace,header=None)
    
    
    # ==================Magnitudes==================================
    # ==============================================================
    ID = data.iloc[:,data_dict['id']]
    if 'mag' in data_dict.keys(): 
        
        
        # ============MUtliple Mag columns=========================
        
        # The case of multiple brightness columns        
        if type(data_dict['mag']) == list:
            
            # Separatting the columns as per input dictionary
            time = data.iloc[:,data_dict['time']]; mag1 = data.iloc[:,data_dict['mag'][0]];
            mag2 = data.iloc[:,data_dict['mag'][1]]
            
            # Case where there are brightness error columns
            if 'mag_err' in data_dict.keys():                
                
                mag_error = data.iloc[:,data_dict['mag_err']]
                # Creating a new dictionary for the columns above separate data
                standard_data = {'ID':ID,'time':time,'mag1':mag1,'mag2':mag2,'mag_error':mag_error}
                
            # Case were there are no error columns
            else:
                
                standard_data = {'ID':ID,'time':time,'mag1':mag1,'mag2':mag2}
                           
        
        
                
        # ============Column with Mag_filters and errors==========================
        
        # Including filters in dataframe
        elif 'filters'in data_dict.keys() and 'mag_err' in data_dict.keys():
            
            # Separatting the columns as per input dictionary
            time = data.iloc[:,data_dict['time']]; mag = data.iloc[:,data_dict['mag']];
            mag_error = data.iloc[:,data_dict['mag_err']]

            filters = data.iloc[:,data_dict['filters']]
            standard_data = {'ID':ID,'time':time,'mag':mag,'mag_error':mag_error,'filters':filters}
            
        elif 'filters' in data_dict.keys():
            
            # Separatting the columns as per input dictionary
            time = data.iloc[:,data_dict['time']]; mag = data.iloc[:,data_dict['mag']];
    

            filters = data.iloc[:,data_dict['filters']]
            standard_data = {'ID':ID,'time':time,'mag':mag,'filters':filters}
            
            
            
            
        #=================Single Mag Column with and with errors============================
        
        # Case of single brightness columns    
        else:    
            
            
            # Separatting the columns as per input dictionary
            time = data.iloc[:,data_dict['time']]; mag = data.iloc[:,data_dict['mag']]; 
            
            if 'mag_err' in data_dict.keys():
                
                
                mag_error = data.iloc[:,data_dict['mag_err']]
                # Creating a new dictionary for the columns above separate data
                standard_data = {'ID':ID,'time':time,'mag':mag,'mag_error':mag_error}
                
            else:
                
                standard_data = {'ID':ID,'time':time,'mag':mag}
                
                
#-----------------------------------------------------------------------------------------------------------------                
            
#`````````````````````````````````````````````````````````````````````````````````````````````````````````````````      
#============================================Fluxes===============================================================
#=================================================================================================================
#`````````````````````````````````````````````````````````````````````````````````````````````````````````````````
        
    else:
        
        
        
                # ============MUtliple Mag columns=========================
        
        # The case of multiple brightness columns        
        if type(data_dict['flux']) == list:
            
            # Separatting the columns as per input dictionary
            time = data.iloc[:,data_dict['time']]; flux1 = data.iloc[:,data_dict['flux'][0]];
            flux2 = data.iloc[:,data_dict['flux'][1]]
            
            # Case where there are brightness error columns
            if 'flux_err' in data_dict.keys():                
                
                flux_error = data.iloc[:,data_dict['flux_err']]
                # Creating a new dictionary for the columns above separate data
                standard_data = {'ID':ID,'time':time,'flux1':flux1,'flux2':flux2,'flux_error':flux_error}
                
            # Case were there are no error columns
            else:
                
                standard_data = {'ID':ID,'time':time,'flux1':flux1,'flux2':flux2}
                           
        
        
                
        # ============Column with Mag_filters and errors==========================
        
        # Including filters in dataframe
        elif 'filters'in data_dict.keys() and 'flux_err' in data_dict.keys():
            
            # Separatting the columns as per input dictionary
            time = data.iloc[:,data_dict['time']]; flux = data.iloc[:,data_dict['flux']];
            flux_error = data.iloc[:,data_dict['flux_err']]

            filters = data.iloc[:,data_dict['filters']]
            standard_data = {'ID':ID,'time':time,'flux':flux,'flux_error':flux_error,'filters':filters}
            
        elif 'filters' in data_dict.keys():
            
            # Separatting the columns as per input dictionary
            time = data.iloc[:,data_dict['time']]; flux = data.iloc[:,data_dict['flux']];
    

            filters = data.iloc[:,data_dict['filters']]
            standard_data = {'ID':ID,'time':time,'flux':flux,'filters':filters}
            
            
            
            
        #=================Single Mag Column with and with errors============================
        
        # Case of single brightness columns    
        else:    
            
            
            # Separatting the columns as per input dictionary
            time = data.iloc[:,data_dict['time']]; flux = data.iloc[:,data_dict['flux']]; 
            
            if 'flux_err' in data_dict.keys():
                
                
                flux_error = data.iloc[:,data_dict['flux_err']]
                # Creating a new dictionary for the columns above separate data
                standard_data = {'ID':ID,'time':time,'flux':flux,'flux_error':flux_error}
                
            else:
                
                standard_data = {'ID':ID,'time':time,'flux':flux}
        
        
    
    return pd.DataFrame.from_dict(standard_data)

# Testing the function on three available files

In [5]:
url = "https://raw.githubusercontent.com/MachineLearningUniandes/MANTRA/master/data/lightcurves/transient_lightcurves.csv"
url1 = 'https://raw.githubusercontent.com/MachineLearningUniandes/MANTRA/master/data/lightcurves/transient_labels.csv'
url2 = 'https://raw.githubusercontent.com/MachineLearningUniandes/MANTRA/master/data/lightcurves/transient_info.txt'
# transient_lc = pd.read_csv(url)

path1 = '/home/malema/Desktop/Malema_UWC_Work/Data/20121012_02331333_O_CrabNebula_E.dat' # Oseti
path2 = '/home/malema/Desktop/Malema_UWC_Work/Data/test_set_batch1.csv'  # Plastic
path3 = url  # CRTS

oSETI_dt=read_lc3(flpath=path1,data_dict={'time':1,'flux':[2,3],'id':5},delim_whitespace=True,header_nrows=2)

plasticc_dt=read_lc3(flpath=path2,data_dict={'time':1,'flux':3,'flux_err':4,'id':0,'filters':2},
                     delim_whitespace=False,header_nrows=1)

CRTS_dt=read_lc3(flpath=path3,data_dict={'time':4,'mag':2,'mag_err':3,'id':0},
                     delim_whitespace=False,header_nrows=1)

# Checking the header

In [6]:
print('CRTS \n',CRTS_dt.head(),'\n')

print('oSETI \n',oSETI_dt.head(),'\n')
print('Plastc \n',plasticc_dt.head(),'\n')

CRTS 
                           ID          time      mag  mag_error
0  TranID1409030010044114444  53766.089871  18.8765   0.166417
1  TranID1409030010044114444  53990.458866  20.0519   0.281733
2  TranID1409030010044114444  53996.286004  20.2199   0.295764
3  TranID1409030010044114444  54385.205789  21.1192   0.495390
4  TranID1409030010044114444  54355.282285  19.3289   0.195002 

oSETI 
      ID      time     flux1     flux2
0  8607  0.106574  0.000082 -0.102021
1  8608  0.106574  0.000017 -0.108044
2  8609  0.106574  0.000017 -0.094419
3  8610  0.106574  0.000050 -0.101473
4  8611  0.106574  0.000050 -0.103084 

Plastc 
    ID        time      flux  flux_error  filters
0  13  59798.3205 -1.299735    1.357315        2
1  13  59798.3281 -2.095392    1.148654        1
2  13  59798.3357 -0.923794    1.763655        3
3  13  59798.3466 -4.009815    2.602911        4
4  13  59798.3576 -3.403503    5.367328        5 

