In [77]:
# This File is for extracting feature data-points from the excel file using openpyxl and pandas.  

In [78]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

In [79]:
class DataProcessor():                    # DataProcessor class for importing the data and processing the initial files
    def __init__(self, file_path):
        self.file_path = file_path
        self.data = None                  # Initialization the blank dataframe in class
        
    def import_data(self):                # Improting the data file
        try:
            self.data = pd.read_excel(self.file_path)
            print("File Read OK.")
        except Exception as e:
            print(f"Error in File Read: {e}")
            
    def output_data_structure(self):      # Function to Output the Column Names and a Brief Overview of the DataSet
        if self.data is not None:         # This can be skipped also as the self.data dataframe can be used for same purpose
            print("The Columns/ Attributes/ Features are :")
            for column_name in self.data.columns:
                print(column_name)
            print("A short Summary of Data Table: ")
            data_tibble = self.data
        else:
            print("Check input data.")
            data_tibble = None
        return data_tibble

    def drop_column(self, column_name):    #Function to Drop a specific Column
        if self.data is not None:
            self.data = self.data.drop(column_name, axis = 1)
            print(f"Column '{column_name}' dropped successfully")
        else:
            print("data not available or wrong column name.")
        
    def create_pair_plot(self):            # Function to create Pair-Wise Scatter PLots
        if self.data is not None:
            sns.pairplot(self.data, kind='scatter', plot_kws={'alpha':0.5})
            plt.suptitle('Pair Plot of Animal Features', y=1.03)
            plt.show()
        else:
            print("Check input data.")

In [80]:
class DataValidation():                    # Class to load the class types for comaprison with our output
    def __init__(self, file_path):
        self.file_path = file_path
        self.data = None
    def import_data(self):
        try:
            self.data = pd.read_excel(self.file_path)
            print("File Read OK.")
        except Exception as e:
            print(f"Error in File Read: {e}")

In [81]:
file_path = r"C:\Users\Akash Mittal\Documents\Zoo_Lab\zoo.xlsx" #File Path of the Input File with Features of Animals

In [82]:
dataprocess = DataProcessor(file_path) #Creating an Object of the DataProcessor Class with data used for analysis

In [83]:
dataprocess.import_data() # Imporitng Data

File Read OK.


In [84]:
dataprocess.output_data_structure() # Data Structure Output 

The Columns/ Attributes/ Features are :
animal_name
hair
feathers
eggs
milk
airborne
aquatic
predator
toothed
backbone
breathes
venomous
fins
legs
tail
domestic
catsize
class_type
A short Summary of Data Table: 


Unnamed: 0,animal_name,hair,feathers,eggs,milk,airborne,aquatic,predator,toothed,backbone,breathes,venomous,fins,legs,tail,domestic,catsize,class_type
0,aardvark,1,0,0,1,0,0,1,1,1,1,0,0,4,0,0,1,1
1,antelope,1,0,0,1,0,0,0,1,1,1,0,0,4,1,0,1,1
2,bass,0,0,1,0,0,1,1,1,1,0,0,1,0,1,0,0,4
3,bear,1,0,0,1,0,0,1,1,1,1,0,0,4,0,0,1,1
4,boar,1,0,0,1,0,0,1,1,1,1,0,0,4,1,0,1,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
96,wallaby,1,0,0,1,0,0,0,1,1,1,0,0,2,1,0,1,1
97,wasp,1,0,1,0,1,0,0,0,0,1,1,0,6,0,0,0,6
98,wolf,1,0,0,1,0,0,1,1,1,1,0,0,4,1,0,1,1
99,worm,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,7


In [85]:
dataprocess.drop_column('class_type') # Dropping the Class_type column

Column 'class_type' dropped successfully


In [86]:
print(dataprocess.data)

    animal_name  hair  feathers  eggs  milk  airborne  aquatic  predator  \
0      aardvark     1         0     0     1         0        0         1   
1      antelope     1         0     0     1         0        0         0   
2          bass     0         0     1     0         0        1         1   
3          bear     1         0     0     1         0        0         1   
4          boar     1         0     0     1         0        0         1   
..          ...   ...       ...   ...   ...       ...      ...       ...   
96      wallaby     1         0     0     1         0        0         0   
97         wasp     1         0     1     0         1        0         0   
98         wolf     1         0     0     1         0        0         1   
99         worm     0         0     1     0         0        0         0   
100        wren     0         1     1     0         1        0         0   

     toothed  backbone  breathes  venomous  fins  legs  tail  domestic  \
0          1 

In [87]:
dataprocess.output_data_structure() # Data Structure Output 

The Columns/ Attributes/ Features are :
animal_name
hair
feathers
eggs
milk
airborne
aquatic
predator
toothed
backbone
breathes
venomous
fins
legs
tail
domestic
catsize
A short Summary of Data Table: 


Unnamed: 0,animal_name,hair,feathers,eggs,milk,airborne,aquatic,predator,toothed,backbone,breathes,venomous,fins,legs,tail,domestic,catsize
0,aardvark,1,0,0,1,0,0,1,1,1,1,0,0,4,0,0,1
1,antelope,1,0,0,1,0,0,0,1,1,1,0,0,4,1,0,1
2,bass,0,0,1,0,0,1,1,1,1,0,0,1,0,1,0,0
3,bear,1,0,0,1,0,0,1,1,1,1,0,0,4,0,0,1
4,boar,1,0,0,1,0,0,1,1,1,1,0,0,4,1,0,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
96,wallaby,1,0,0,1,0,0,0,1,1,1,0,0,2,1,0,1
97,wasp,1,0,1,0,1,0,0,0,0,1,1,0,6,0,0,0
98,wolf,1,0,0,1,0,0,1,1,1,1,0,0,4,1,0,1
99,worm,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0


In [89]:
print(dataprocess.data)

    animal_name  hair  feathers  eggs  milk  airborne  aquatic  predator  \
0      aardvark     1         0     0     1         0        0         1   
1      antelope     1         0     0     1         0        0         0   
2          bass     0         0     1     0         0        1         1   
3          bear     1         0     0     1         0        0         1   
4          boar     1         0     0     1         0        0         1   
..          ...   ...       ...   ...   ...       ...      ...       ...   
96      wallaby     1         0     0     1         0        0         0   
97         wasp     1         0     1     0         1        0         0   
98         wolf     1         0     0     1         0        0         1   
99         worm     0         0     1     0         0        0         0   
100        wren     0         1     1     0         1        0         0   

     toothed  backbone  breathes  venomous  fins  legs  tail  domestic  \
0          1 

In [90]:
# dataprocess.create_pair_plot() # It creates a Pairwise Scatter Plot of the Data Points

In [91]:
# The Following Code has been incorporate in classes above so it is Marked Out in comment, just for my own reference, and how I build it up.

"""
# let's read the excel files
# Using r" " for the file path so that there are no issues with the / slashes in address of file path.
# class.xlsx includes the data about animals and their corresponding classes.
# zoo.xlsx incldues tne animal features for each animal. 

zoo_class = r"C:\Users\Akash Mittal\Documents\Zoo_Lab\class.xlsx"

animals = r"C:\Users\Akash Mittal\Documents\Zoo_Lab\zoo.xlsx"

# Creating a dataframe, df_zoo_class for the Zoo_Class
df_zoo_class = pd.read_excel(zoo_class)

# Creating a dataframe, df_animals for the animal features.
df_animals = pd.read_excel(animals)

df_zoo_class

df_animals.head()

#head returns a concise set of data from the dataframe

#### The dataframe also has class column, so, let's drop that column from the dataframe to have a dataframe specific for analysis.

df_animals_analysis = df_animals.drop('class_type', axis=1) # Axis = 1 for dropping a column, else for a row, Axis=0 

data_size_cols = anim_features.shape[1] # Counting the number of features in the data-frame
print(data_size_cols)

# Segregating the Animal Features in anim_features.

anim_features = df_animals_analysis[["hair", "feathers", "eggs", "milk", "airborne", "aquatic", "predator", "toothed", "backbone", "breathes", "venomous", "fins", "legs", "tail", "domestic", "catsize"]]

print(anim_features)

# Creating Exemplary Scatter PLots of some data points here 

data_size_rows = anim_features.shape[0] # Counting the number of elements in the data-frame
print(data_size_rows)

data_cols_names = anim_features.columns

data_cols_names

### Using Seaborn and MatplotLib to create pairwise scatter Plots of the Various Animal Features

# Selecting Numeric Columns from the df_animals_analysis dataframe

df_analysis_numeric = df_animals_analysis.select_dtypes(include='number')

sns.pairplot(df_animals, kind='scatter', plot_kws={'alpha': 0.5})
plt.suptitle('Pair Plot of Anime Features', y=1.02)
plt.show()

"""

"""
# Another Way to make the plot
sns.pairplot(df_animals)
plt.suptitle('Pair Plot of Anime Features', y=1.02)
plt.show()
"""
