In [3]:
import seaborn as sns
import numpy as np
import pandas as pd
import scipy.linalg as la
import math
import matplotlib
import matplotlib.pyplot as plt

### Project Focus:

Fit a regression line to the provided data to estimate and predict abalone age based on non-destructive methods.
<br>
Provide reasoning for multivariate techniques as well as assumptions on that makes them valid to use.

### Credentials

Data comes from an original study:

	Warwick J Nash, Tracy L Sellers, Simon R Talbot, Andrew J Cawthorn and
	Wes B Ford (1994) "The Population Biology of Abalone (_Haliotis_
	species) in Tasmania. I. Blacklip Abalone (_H. rubra_) from the North
	Coast and Islands of Bass Strait", Sea Fisheries Division, Technical
	Report No. 48 (ISSN 1034-3288)
    
Original owners of database:
    
	Marine Resources Division
	Marine Research Laboratories - Taroona
	Department of Primary Industry and Fisheries, Tasmania
	GPO Box 619F, Hobart, Tasmania 7001, Australia
	(contact: Warwick Nash +61 02 277277, wnash@dpi.tas.gov.au)

### Cleaning Data

The focus is to use ordinary least squares regression on physical attributes that don't require the abalone to be killed to estimate mean age.
<br>
Attribute information:


   



In [47]:
# Import the .data file into a dataframe with column headers listed below.
Aba_data = pd.read_csv("abalone.data", names = ['Sex', 'Length', 'Diameter', 'Height', 'Whole Weight', 'Shucked Weight', 'Viscera Weight', 'Shell Weight','Ring Count'])

#display(Aba_data)

# Next step is to seperate the male, female, and infant abalone data to minimize confounding factors. 
# Invoking the "df.loc" method to return the seperated abalone data based on sex to their corresponding dataframes  
Aba_data_male = Aba_data.loc[Aba_data['Sex'] == 'M']
Aba_data_female = Aba_data.loc[Aba_data['Sex'] == 'F']  
Aba_data_inf = Aba_data.loc[Aba_data['Sex'] == 'I']

# Dropping the "Sex" column within their respective dataframes to reduce redundant information.
Aba_data_male = Aba_data_male.drop(['Sex'], axis = 1)
Aba_data_female = Aba_data_female.drop(['Sex'], axis = 1)
Aba_data_inf = Aba_data_inf.drop(['Sex'], axis = 1)

# Resetting index starting at 0 enumerating until the last data point within respective dataframes making sure to not include the old segmented indices.
Aba_data_male = Aba_data_male.reset_index(drop = True)
Aba_data_female = Aba_data_female.reset_index(drop = True)
Aba_data_inf = Aba_data_inf.reset_index(drop = True)

display(Aba_data_male, Aba_data_female, Aba_data_inf)


Unnamed: 0,Length,Diameter,Height,Whole Weight,Shucked Weight,Viscera Weight,Shell Weight,Ring Count
0,0.455,0.365,0.095,0.5140,0.2245,0.1010,0.1500,15
1,0.350,0.265,0.090,0.2255,0.0995,0.0485,0.0700,7
2,0.440,0.365,0.125,0.5160,0.2155,0.1140,0.1550,10
3,0.475,0.370,0.125,0.5095,0.2165,0.1125,0.1650,9
4,0.430,0.350,0.110,0.4060,0.1675,0.0810,0.1350,10
...,...,...,...,...,...,...,...,...
1523,0.550,0.430,0.130,0.8395,0.3155,0.1955,0.2405,10
1524,0.560,0.430,0.155,0.8675,0.4000,0.1720,0.2290,8
1525,0.590,0.440,0.135,0.9660,0.4390,0.2145,0.2605,10
1526,0.600,0.475,0.205,1.1760,0.5255,0.2875,0.3080,9


Unnamed: 0,Length,Diameter,Height,Whole Weight,Shucked Weight,Viscera Weight,Shell Weight,Ring Count
0,0.530,0.420,0.135,0.6770,0.2565,0.1415,0.2100,9
1,0.530,0.415,0.150,0.7775,0.2370,0.1415,0.3300,20
2,0.545,0.425,0.125,0.7680,0.2940,0.1495,0.2600,16
3,0.550,0.440,0.150,0.8945,0.3145,0.1510,0.3200,19
4,0.525,0.380,0.140,0.6065,0.1940,0.1475,0.2100,14
...,...,...,...,...,...,...,...,...
1302,0.585,0.475,0.165,1.0530,0.4580,0.2170,0.3000,11
1303,0.585,0.455,0.170,0.9945,0.4255,0.2630,0.2845,11
1304,0.515,0.400,0.125,0.6150,0.2865,0.1230,0.1765,8
1305,0.565,0.450,0.165,0.8870,0.3700,0.2390,0.2490,11


Unnamed: 0,Length,Diameter,Height,Whole Weight,Shucked Weight,Viscera Weight,Shell Weight,Ring Count
0,0.330,0.255,0.080,0.2050,0.0895,0.0395,0.055,7
1,0.425,0.300,0.095,0.3515,0.1410,0.0775,0.120,8
2,0.355,0.280,0.085,0.2905,0.0950,0.0395,0.115,7
3,0.380,0.275,0.100,0.2255,0.0800,0.0490,0.085,10
4,0.240,0.175,0.045,0.0700,0.0315,0.0235,0.020,5
...,...,...,...,...,...,...,...,...
1337,0.480,0.355,0.110,0.4495,0.2010,0.0890,0.140,8
1338,0.390,0.310,0.085,0.3440,0.1810,0.0695,0.079,7
1339,0.390,0.290,0.100,0.2845,0.1255,0.0635,0.081,7
1340,0.405,0.300,0.085,0.3035,0.1500,0.0505,0.088,7
