<h1 style="text-align:center">COMP20270 OOP in Python, Assignment 2: MyDataFrame<br>Brian Manning</h1>

___
## MySeries Class
- This class is used to create an indexed series stored as a dict
___
- It has methods to do the following:
    1. __min__: Calculate the minimum value in the series
    2. __max__: Calculate the maximum value in the series
    3. __mean__: Calculate the mean of the series
    3. __print__: Prints the value of the series
    3. __item_at_ind__: Prints the value at a certain indice

- It has the following properties:
    1. __s_dict__: This is a dictionary which stores the inputted series

In [None]:
class MySeries():
    def __init__(self, s_dict, **kwargs):
        # check if the inputted dictionary is a dictionary
        if isinstance(s_dict, dict):
            self.s_dict = s_dict
        # if it isnt a dictionary, check if its a list - if not itll be invalid
        elif isinstance(s_dict, list):
            # check if an index is passed, if not add index
            if 'index' not in kwargs:
                self.s_dict = dict(zip(range(len(s_dict)), s_dict))
            # check if the index passed was a list
            elif isinstance(kwargs.get('index'), list):
                # check if the length of the index matches the length of the series
                if len(kwargs.get('index')) == len(s_dict):
                    self.s_dict = dict(zip(kwargs.get('index'), s_dict))
                else:
                    raise ValueError("Index must be the same length as the series.")
            else:
                raise ValueError("Index must be of type list.")
        else:
            raise ValueError("Incorrect input to MySeries class.\nYou must input either a list or a dictionary.")

    def min(self):
        # min function returns the key for the mimum number value
        # if the series is of strings it returns the letter closest to the start of the alphabet
        try:
            min_val = min(self.s_dict, key=self.s_dict.get)
            return self.s_dict[min_val]
        except:
            print("Could not calculate the minimum of the given values")
    
    def max(self):
        # max function returns the maximum number value
        # if the series is of strings it returns the letter furthest from the start of the alphabet
        try:
            max_val = max(self.s_dict, key=self.s_dict.get)
            return self.s_dict[max_val]
        except:
            print("Could not calculate the maximum of the given values")
    
    def mean(self):
        # mean function calculates the mean average of the series
        # if the series is of strings it raises an error
        try:
            mean_value = float(sum(self.s_dict.values()))/len(self.s_dict)
            return mean_value
        except:
            raise ValueError("Could not calculate the mean of the given values.")
    
    def print(self):
        for key in self.s_dict.keys():
            key_value = self.s_dict[key]
            max_len_key = max(len(str(v)) for v in self.s_dict.keys())
            max_len_val = max(len(str(v)) for v in self.s_dict.values())
            # tries to print as integer, if it cant it prints as normal string
            # allows for number formatting
            try:
                print("{0:>{1}}".format(key,max_len_key), '\t', "{:.1f}".format(key_value))
            except:
                print("{0:>{1}}".format(key,max_len_key), '\t',"{0:>{1}}".format(key_value,max_len_val))

    def item_at_ind(self, key):
        # check if key exists, if not show what keys exist
        if key in self.s_dict.keys():
            print(self.s_dict[key])
        else:
            print(f"\'{key}\' not in index.\n")
            print(f"The following keys may be used:\n{self.s_dict.keys()}")

___
<div class="alert alert-block alert-success"><h2 style="text-align:center">MySeries - Sample 1</h2></div>

In [None]:
ms3 = MySeries([1,2,1], index = ['a','b','c'])
ms3.s_dict

In [None]:
print('Minimum value:',ms3.min())
print('Maximum value:',ms3.max())
print('Mean value:',ms3.mean())

In [None]:
ms3.print()

In [None]:
ms3.item_at_ind('c')

___
<div class="alert alert-block alert-success"><h2 style="text-align:center">MySeries - Sample 2</h2></div>

In [None]:
ms4 = MySeries([4,5,6])
ms4.s_dict

In [None]:
print('Minimum value:',ms4.min())
print('Maximum value:',ms4.max())
print('Mean value:',ms4.mean())

In [None]:
ms4.print()

___
<div class="alert alert-block alert-success"><h2 style="text-align:center">MySeries - Sample 3</h2></div>

In [None]:
d = {'b': 1, 'a': 0, 'c': 2}
s2 = MySeries(d)
s2.s_dict

In [None]:
print('Minimum value:',s2.min())
print('Maximum value:',s2.max())
print('Mean value:   ',s2.mean())

In [None]:
s2.print()

___
## MyDataFrame Class
- This class is used to create a basic data frame, with columns from the MySeries class
___
- It has methods to do the following:
    1. __min__: Calculate the minimum value of each of the series
    2. __max__: Calculate the maximum value of each of the series
    3. __mean__: Calculate the mean of each of the series
    3. __print__: Prints the value of the data frame, based on the sorting
    3. __sort_values__: Sorts the values in ascending order for the given row, takes optional reverse_val parameter which will reverse the sorting if it is equal to True (default False)
    3. ____repr____: Calls the print function, returns empty string (to replicate pandas)
    3. ____calculate__: Calculates either min, max, or mean of the dataframe based on the type passed
    3. ____calc_max_len__: Calculates the maximum length in a column, to be used when printing


- It has the following properties:
    1. __dict__: This is a dictionary which stores the inputted series
    1. __index__: Stores the row headings of the data


In [None]:
import operator

class MyDataFrame():
    def __init__(self, d, **kwargs):
        # check if the argument passed is a dictionary
        if not isinstance(d, dict):
            raise ValueError('Error input not dict')
        else:
            self.dict = {}
            # check if index provided nad
            if 'index' not in kwargs:
                for key in d.keys():
                    self.dict[key] = MySeries(d[key])
                self.index = range(len(self.dict)-1)
            if isinstance(kwargs.get('index'), list):
                if len(kwargs.get('index')) == len(next(iter(d.values()))):
                    self.index = kwargs.get('index')
                    for key in d.keys():
                        self.dict[key] = MySeries(d[key], index=self.index)
                else:
                    raise ValueError('Index inputted not of same length')
    
    # added this to replicate how pandas implements it when you only say the instance variable name
    # returns blank string as repr must return string
    def __repr__(self):
        self.print()
        return ''
        
    def print(self, calculation=None):
        count_key, count_index = 0, 0
        max_col_len = {}
        # check if the keys have been sorted
        # loop through rows 
        for i in range(1, len(self.index)+2):
            # loop through each col heading (key)
            for key in self.dict.keys():
                if count_key == 0:
                    # prints the empty space at the start and the first column heading
                    count_key = 1
                    max_col_len[key] = self.__calc_max_len(key)
                    print("\t","{0:>{1}}".format(key, max_col_len[key]), sep="", end= "\t")
                elif i == 1:
                    # prints the rest of the column headings
                    max_col_len[key] = self.__calc_max_len(key)
                    count_key += 1
                    print("{0:>{1}}".format(key, max_col_len[key]), end="\t")
                elif i > 1 and count_index == 1:
                    # prints the row headings
                    print()
                    row_name = self.index[i-2]
                    print(self.index[i-2], end="\t")
                    print("{0:>{1}}".format(self.dict[key].s_dict[row_name], max_col_len[key]),sep="", end="\t")
                    count_index += 1             
                else:
                    # prints the data
                    print("{0:>{1}}".format(self.dict[key].s_dict[row_name], max_col_len[key]),sep="", end="\t")
            count_index = 1
        count_index = 0
        
    def sort_values(self, column_head, reverse_val=False):
        # changes the order of column headings to reflect input
        self.index = sorted(self.dict[column_head].s_dict ,  key=self.dict[column_head].s_dict.get, reverse=reverse_val)
    
    def min(self):
        self.__calculate('min')
    
    def max(self):
        self.__calculate('max')
    
    def mean(self):
        self.__calculate('mean')
    
    # prevent access to calculate as it uses methodcaller
    def __calculate(self, type):
        max_len_key = len(max(self.dict, key=len))
        for key in self.dict.keys():
            try:
                # note: operator.methodcaller is being used here as the for loop is the same for each min, max, mean
                # used to prevent code duplication in each method 
                value = operator.methodcaller(type)(self.dict[key])
                print("{0:>{1}}".format(key, max_len_key), "{:>11}".format("{:.2f}".format(value)))
            except:
                # used so that the other calculations are printed
                # when the calculations are done on a MySeries instance they raise an error
                pass
    
    def __calc_max_len(self, key):
        # used to calculate the maximum of either the values in a column or the key of that column
        # to be used for table formatting in the print function above
        max_temp = max(len(str(v)) for v in self.dict[key].s_dict.values())
        return max(max_temp, len(key))

___
<div class="alert alert-block alert-success"><h2 style="text-align:center">Dataframe - Sample 1</h2></div>

In [None]:
d = { 'Sun Hours' : [ 4.5 , 4.0 , 5.1 , 5 ],
'Max Temp' : [ 19.6 , 19.1 , 19.6 , 20.0 ],
'Min Temp' : [ 12.7 , 12.5 , 13.3 , 12.1 ],
'Rain (mm)' : [ 82 , 109 , 65 , 76 ],
'Rain Days' : [ 13 , 20 , 10 , 9.7 ]}

In [None]:
df1 = MyDataFrame(d)
df1

In [None]:
df2 = MyDataFrame(d, index = ['Clare', 'Galway','Dublin', 'Wexford'])
df2

In [None]:
df2.sort_values('Rain (mm)')
df2.print()

In [None]:
df2.max()

In [None]:
df2.min()

In [None]:
df2.mean()

___
<div class="alert alert-block alert-success"><h2 style="text-align:center">Dateframe - Sample 2</h2></div>

In [None]:
films = {'Rank': [112,62,41,172,230,176],
        'Release Year': [1973,1980,1960,2015,1976,1996],
        'IMDB Rating': [8.3,8.4,8.5,8.1,8.1,8.1],
        'Time (minutes)': [129,146,109,118,120,98],
        'Main Genre': ['Comedy','Horror','Horror','Drama','Drama','Drama']}
f_names = ['Sting','Shining', 'Psycho','Room','Rocky','Fargo']

films_df =  MyDataFrame(films, index = f_names) 
films_df

In [None]:
films_df.print()

In [None]:
films_df.sort_values('Release Year')
films_df.print()

In [None]:
films_df.sort_values('Release Year',reverse_val=True)
films_df.print()

In [None]:
films_df.min()

In [None]:
films_df.max()

In [None]:
films_df.mean()

___