In [None]:
import numpy as np
import matplotlib.pyplot as pl
import pandas as pd
import os 
import os.path as osp
import itertools
import astropy.io.fits as fits

from sources.preprocessor import data_processor # A well defined function to sample columns of interest from a full catalogue
import missingno as msno

# plot style
import seaborn as sns
sns.set_style("ticks")

In [None]:
# noclass.to_csv('unclassified.csv', index = False, header=True)

# loading the data from the machine
noclass = pd.read_csv('unclassified.csv')

In [None]:
# Generate the missingno bar plot
ax = msno.matrix(noclass,figsize=(12, 8), fontsize=22)

# Customize: hide the right vertical axis (right spine)
ax.spines['right'].set_visible(False)

# Further customization (optional)
ax.set_xlabel('Features', fontsize=20)
ax.set_ylabel('Count of non-null values', fontsize=20)
ax.set_title('Missing Data Overview', fontsize=22)

# Show the plot
pl.show()

In [None]:
# In the catalogues the Irac fluxes S3.6, S4.5, S5.8, S8.0 are labelled SPLASH1, SPLASH2, SPLASH3 and SPLASH4 flux respectively
S8_S45 = np.log10( noclass['SPLASH_4_FLUX'] / noclass['SPLASH_2_FLUX'])
S58_S36 = np.log10(noclass['SPLASH_3_FLUX'] / noclass['SPLASH_1_FLUX'])
S45_S36 = np.log10(noclass['SPLASH_2_FLUX'] / noclass['SPLASH_1_FLUX'])
# putting the features together
mid_data = np.vstack([np.array(S8_S45),
               np.array(S58_S36)]).T

data = noclass.drop(['SPLASH_1_FLUX', 'SPLASH_2_FLUX', 'SPLASH_3_FLUX','SPLASH_4_FLUX', 'L14','LIR_WHz'], axis = 1)

# and equating it to the list
data['log(S8/S45)'] = S8_S45
data['log(S58/S36)'] = S58_S36
data['log(S45/S36)'] = S45_S36

# lastly we convert the -99 to Nan
data1 = data.replace([- np.inf, -99, np.inf], np.NaN, regex=True)
# mightee_data1 = mightee_data.replace([ np.inf, -99], np.NaN, regex=True)


data1.rename(columns = {'AGN':'class_labels', 'MASS_lephare':'Mstar'}, inplace = True)


In [None]:
data1 = data1.drop(["class_labels", "SFG", "unclass","probSFG"], axis='columns')


In [None]:
# Generate the missingno bar plot
ax = msno.matrix(data1,figsize=(12, 8), fontsize=22)

# Customize: hide the right vertical axis (right spine)
ax.spines['right'].set_visible(False)

# Further customization (optional)
ax.set_xlabel('Features', fontsize=20)
ax.set_ylabel('Count of non-null values', fontsize=20)
ax.set_title('Missing Data Overview', fontsize=22)

# Show the plot
pl.show()