In [22]:
class ListV2:
    def __init__(self, values):    
        self.values = list(values)
        
    def __add__(self, other):
        if len(self.values) != len(other.values):
            raise ValueError("Lists must have same length")
        return ListV2([self.values[i] + other.values[i] for i in range(len(self.values))])
    
    def __sub__(self, other):
        if len(self.values) != len(other.values):
            raise ValueError("Lists must have same length")
        return ListV2([self.values[i] - other.values[i] for i in range(len(self.values))])
    
    def __mul__(self, other):
        if len(self.values) != len(other.values):
            raise ValueError("Lists must have same length")
        return ListV2([self.values[i] * other.values[i] for i in range(len(self.values))])
    
    def __truediv__(self, other):
        if len(self.values) != len(other.values):
            raise ValueError("Lists must have same length")
        if 0 in other:
            raise ValueError("Division by zero not allowed.")
        return ListV2([self.values[i] / other.values[i] for i in range(len(self.values))])
    
    def append(self, value):
        self.values.append(value)
    
    def mean(self):
        print("sum", sum(self.values), len(self.values))
        return sum(self.values) / len(self.values)
    
    def __iter__(self):
        self.current_index = 0
        return self
    
    def __next__(self):
        if self.current_index >= len(self.values):
            raise StopIteration
        value = self.values[self.current_index]
        self.current_index += 1
        return value
    
    def __repr__(self):
        return f"ListV2({self.values})"
    
    
class DataFrame:
    def __init__(self, data, columns):
        self.index = {}
        self.data = {}
        self.columns = list(columns)
        
        for i, col in enumerate(columns):
            
            self.data[col] = ListV2([ele[i] for ele in data])
            print("data",self.data[col])  
        
        for i, row in enumerate(data):
            self.index[row[0]] = i
    
    def set_index(self, col):
        self.index = {}
        for i, val in enumerate(self.data[col].values):
            self.index[val] = i
    
    def __setitem__(self, key, value):
        self.data[key] = value
    
    def __getitem__(self, key):
        if isinstance(key, str):
            return self.data[key][self.index[key]]
        else:
            return DataFrame([self.get_row(i) for i in key], self.columns)
    
    def loc(self, label):
        return self.get_row(self.index[label])
    
    def iteritems(self):
        for col in self.columns:
            yield col, self.data[col]
    
    def iterrows(self):
        for label in self.index:
            yield label, self.get_row(self.index[label])
    
    def as_type(self, dtype):
        for col in self.columns:
            self.data[col] = ListV2([dtype(ele) for ele in self.data[col].values])
    
    def drop(self, labels):
        if isinstance(labels, str):
            labels = [labels]
        for label in labels:
            del self.data[label]
            del self.index[self.data[label].values[0]]
            self.columns.remove(label)
    
    def mean(self):
        return {col: sum(self.data[col].values) / len(self.data[col].values) for col in self.columns[1:]}
    
    def __repr__(self):
        # rows = [[label] + self.get_row(i) for label, i in self.index.items()]
        # column_widths = [max(len(str(row[i])) for row in rows) for i in range(len(self.columns))]
        # header = [col.ljust(column_widths[i]) for i, col in enumerate(self.columns)]
        # row_strings = [header] + ["," * column_widths[i] for i in range(len(self.columns))] + \
        #               [[str(row[i]).ljust(column_widths[i]) for i in range(len(self.columns))] for row in rows]
        # return "\n".join([" ".join(row) for row in row_strings])
        header = [""] + self.columns
        row_strings = ','.join(header) + "\n"
        for i, key in enumerate(self.index.keys()):
            row_strings += str(key) + ',' + ','.join(str(ele) for ele in self.get_row(i)) + "\n"
        print(row_strings)
        return row_strings[:-1]
    
    def get_row(self, index):
        return [self.data[col].values[index] for col in self.columns]

In [5]:
x = [1, 2, 3, 4]
l1 = ListV2(x)
expected_output = 2.5
expected_output == l1.mean()

sum 10 4


True

In [6]:
x.append(5)
l1.append(5)
expected_output = 3.0
expected_output == l1.mean()

sum 15 5


True

In [25]:
columns = ('StudentName', 'E1', 'E2', 'E3', 'E4','E5')
data1=[ele.strip().split(',') for ele in open('testdata_1.txt')]
print("data1", data1)
df = DataFrame(data=[ele.strip().split(',') for ele in open('testdata_1.txt')], columns=columns)

data1 [['student1', '92', '77', '87', '77', '94'], ['student2', '74', '93', '88', '67', '85'], ['student3', '83', '96', '74', '79', '92'], ['student4', '100', '72', '83', '85', '66'], ['student5', '77', '96', '66', '79', '92'], ['student6', '100', '86', '84', '70', '71'], ['student7', '66', '91', '94', '97', '80'], ['student8', '97', '86', '75', '69', '88'], ['student9', '95', '98', '99', '85', '86'], ['student10', '78', '76', '73', '88', '86']]
data ListV2(['student1', 'student2', 'student3', 'student4', 'student5', 'student6', 'student7', 'student8', 'student9', 'student10'])
data ListV2(['92', '74', '83', '100', '77', '100', '66', '97', '95', '78'])
data ListV2(['77', '93', '96', '72', '96', '86', '91', '86', '98', '76'])
data ListV2(['87', '88', '74', '83', '66', '84', '94', '75', '99', '73'])
data ListV2(['77', '67', '79', '85', '79', '70', '97', '69', '85', '88'])
data ListV2(['94', '85', '92', '66', '92', '71', '80', '88', '86', '86'])


In [28]:
print(df)

,StudentName,E1,E2,E3,E4,E5
student1,student1,92,77,87,77,94
student2,student2,74,93,88,67,85
student3,student3,83,96,74,79,92
student4,student4,100,72,83,85,66
student5,student5,77,96,66,79,92
student6,student6,100,86,84,70,71
student7,student7,66,91,94,97,80
student8,student8,97,86,75,69,88
student9,student9,95,98,99,85,86
student10,student10,78,76,73,88,86

,StudentName,E1,E2,E3,E4,E5
student1,student1,92,77,87,77,94
student2,student2,74,93,88,67,85
student3,student3,83,96,74,79,92
student4,student4,100,72,83,85,66
student5,student5,77,96,66,79,92
student6,student6,100,86,84,70,71
student7,student7,66,91,94,97,80
student8,student8,97,86,75,69,88
student9,student9,95,98,99,85,86
student10,student10,78,76,73,88,86


In [54]:
expected_output = """,StudentName,E1,E2,E3,E4,E5
0,student1,92,77,87,77,94
1,student2,74,93,88,67,85
2,student3,83,96,74,79,92
3,student4,100,72,83,85,66
4,student5,77,96,66,79,92
5,student6,100,86,84,70,71
6,student7,66,91,94,97,80
7,student8,97,86,75,69,88
8,student9,95,98,99,85,86
9,student10,78,76,73,88,86"""

In [55]:
expected_output == df.__repr__()

True

In [60]:
type(df['StudentName'])

assignment.ListV2

In [57]:
expected_output = ['student1', 'student2', 'student3', 'student4', 'student5', 'student6', 'student7', 'student8', 'student9', 'student10']

In [58]:
expected_output == df['StudentName'].values

True

In [62]:
df[['StudentName', 'E1']]

,StudentName,E1
0,student1,92
1,student2,74
2,student3,83
3,student4,100
4,student5,77
5,student6,100
6,student7,66
7,student8,97
8,student9,95
9,student10,78

In [63]:
expected_output = """,StudentName,E1
0,student1,92
1,student2,74
2,student3,83
3,student4,100
4,student5,77
5,student6,100
6,student7,66
7,student8,97
8,student9,95
9,student10,78"""


In [64]:
expected_output == df[['StudentName', 'E1']].__repr__()

True

In [65]:
output = df[['StudentName', 'E1']]

In [66]:
output

,StudentName,E1
0,student1,92
1,student2,74
2,student3,83
3,student4,100
4,student5,77
5,student6,100
6,student7,66
7,student8,97
8,student9,95
9,student10,78

In [67]:
expected_output = ['student1', 'student2', 'student3', 'student4', 'student5', 'student6', 'student7', 'student8', 'student9', 'student10']
expected_output == list(output['StudentName'])

True

In [68]:
expected_output

['student1',
 'student2',
 'student3',
 'student4',
 'student5',
 'student6',
 'student7',
 'student8',
 'student9',
 'student10']

In [69]:
expected_output = ['92', '74', '83', '100', '77', '100', '66', '97', '95', '78']
expected_output == list(output['E1'])

True

In [70]:
expected_output = """,StudentName,E1,E2,E3,E4,E5
0,student2,74,93,88,67,85
1,student3,83,96,74,79,92
2,student4,100,72,83,85,66"""

expected_output == df[1:4].__repr__() # slice

True

In [20]:
expected_output = {'StudentName': ['student2', 'student3', 'student4'],
             'E1': ['74', '83', '100'],
             'E2': ['93', '96', '72'],
             'E3': ['88', '74', '83'],
             'E4': ['67', '79', '85'],
             'E5': ['85', '92', '66']}

output = df[1:4]
for key, value in expected_output.items():
    print(key, value == list(output[key]))
    

StudentName True
E1 True
E2 True
E3 True
E4 True
E5 True


In [72]:
df[1:4, :3]

,StudentName,E1,E2
0,student2,74,93
1,student3,83,96
2,student4,100,72

In [73]:
expected_output = """,StudentName,E1,E2
0,student2,74,93
1,student3,83,96
2,student4,100,72"""

expected_output == df[1:4, :3].__repr__() # two slices

True

In [74]:
for col in ['E1', 'E2', 'E3', 'E4','E5']:
    df.as_type(col, int)

In [75]:
expected_output = [92, 74, 83, 100, 77, 100, 66, 97, 95, 78]
expected_output == df['E1'].values

True

In [77]:
expected_output = [169, 167, 179, 172, 173, 186, 157, 183, 193, 154]
expected_output == (df['E1'] + df['E2']).values

True

In [78]:
expected_output = {'E1': 86.2, 'E2': 87.1, 'E3': 82.3, 'E4': 79.6, 'E5': 84.0}
expected_output == df[['E1', 'E2', 'E3', 'E4','E5']].mean()

True

In [79]:
df.drop('E5')

In [80]:
expected_output = ['StudentName', 'E1', 'E2', 'E3', 'E4']
expected_output == df.columns

True

In [81]:
expected_output = """,StudentName,E1,E2,E3,E4
0,student1,92,77,87,77
1,student2,74,93,88,67
2,student3,83,96,74,79
3,student4,100,72,83,85
4,student5,77,96,66,79
5,student6,100,86,84,70
6,student7,66,91,94,97
7,student8,97,86,75,69
8,student9,95,98,99,85
9,student10,78,76,73,88"""

expected_output == df.__repr__()

True

In [86]:
expected_output = df.loc(0)
expected_output == df.loc(0)
df.loc(0)

('student1', 92, 77, 87, 77)

In [87]:
df.set_index(list(df['StudentName']))
df.drop('StudentName')

In [88]:
df

,E1,E2,E3,E4
student1,92,77,87,77
student2,74,93,88,67
student3,83,96,74,79
student4,100,72,83,85
student5,77,96,66,79
student6,100,86,84,70
student7,66,91,94,97
student8,97,86,75,69
student9,95,98,99,85
student10,78,76,73,88

In [89]:
expected_output = """,E1,E2,E3,E4
student1,92,77,87,77
student2,74,93,88,67
student3,83,96,74,79
student4,100,72,83,85
student5,77,96,66,79
student6,100,86,84,70
student7,66,91,94,97
student8,97,86,75,69
student9,95,98,99,85
student10,78,76,73,88"""

expected_output == df.__repr__()

True

In [90]:
df.loc('student1')

(92, 77, 87, 77)

In [91]:
expected_output = """,E1,E2
student1,92,77
student2,74,93"""

expected_output == df.loc((['student1', 'student2'], ['E1', 'E2'])).__repr__()

True

In [92]:
expected_output = {'E1': [92, 74], 'E2': [77, 93]}

output = df.loc((['student1', 'student2'], ['E1', 'E2']))
for key, value in expected_output.items():
    print(key, value == list(output[key]))



E1 True
E2 True


In [93]:
expected_output = ['E1', 'E2']

expected_output == output.columns

True

In [95]:
expected_output = {'student1': 0, 'student2': 1} 
expected_output == output.index

True

In [96]:
output.iterrows()

[('student1', (92, 77)), ('student2', (74, 93))]

In [98]:
expected_output = [('student1', (92, 77)), ('student2', (74, 93))]
expected_output == output.iterrows()

True

In [100]:
expected_output = {'E1': [92, 74], 'E2': [77, 93]}
expected_output == dict(output.iteritems())

True

In [101]:
expected_output = [('student1', (92, 77, 87, 77)),
 ('student2', (74, 93, 88, 67)),
 ('student3', (83, 96, 74, 79)),
 ('student4', (100, 72, 83, 85)),
 ('student5', (77, 96, 66, 79)),
 ('student6', (100, 86, 84, 70)),
 ('student7', (66, 91, 94, 97)),
 ('student8', (97, 86, 75, 69)),
 ('student9', (95, 98, 99, 85)),
 ('student10', (78, 76, 73, 88))]

expected_output == df.iterrows()

True

In [102]:
df.iteritems()

dict_items([('E1', [92, 74, 83, 100, 77, 100, 66, 97, 95, 78]), ('E2', [77, 93, 96, 72, 96, 86, 91, 86, 98, 76]), ('E3', [87, 88, 74, 83, 66, 84, 94, 75, 99, 73]), ('E4', [77, 67, 79, 85, 79, 70, 97, 69, 85, 88])])

In [103]:
expected_output = {'E1': [92, 74, 83, 100, 77, 100, 66, 97, 95, 78],
 'E2': [77, 93, 96, 72, 96, 86, 91, 86, 98, 76],
 'E3': [87, 88, 74, 83, 66, 84, 94, 75, 99, 73],
 'E4': [77, 67, 79, 85, 79, 70, 97, 69, 85, 88]}
expected_output == dict(df.iteritems()) # column wise

True

In [104]:
columns = ('StudentName', 'E1', 'E2', 'E3', 'E4', 'E5')
df = DataFrame(data=[ele.strip().split(',') for ele in open('testdata_1.txt')], columns=columns)

for col in ['E1', 'E2', 'E3', 'E4', 'E5']:
    df.as_type(col, int)

df.drop('E5')
df.set_index(list(df['StudentName']))
df.drop('StudentName')
output = df.loc((['student1', 'student2', 'student3'], ['E1']))

In [105]:
expected_output = [('student1', (92,)), ('student2', (74,)), ('student3', (83,))]
expected_output == output.iterrows()

True