In [None]:
import json
import linecache
import matplotlib.pyplot as plt
import pandas as pd
import yaml
from csv_convertor import Reader


# Step 2: Consuming the data

## Approach 1: Create a New Reader-object for Each Consumer (Initializer Dependency)

In this approach, each instance of AverageYear and AverageMonth creates its own Reader object when being initialized.

### Pros:

**Isolation:** Each consumer has its own dedicated Reader instance, ensuring isolation and independence in data processing.

**Flexibility:** Consumers can have different configurations or parameters for reading data if needed.


### Cons:

**Resource Duplication:** If multiple consumers are created, each instance of Reader may load the same data multiple times, potentially consuming more memory.

**Resource Inefficiency:** The same data may be loaded and processed multiple times, leading to inefficiency if data is large.
Increased Complexity: Managing multiple Reader instances and coordinating their behavior might lead to more complex code.


## Approach 2: Share a Single Reader-object Among Consumers (External Dependency)

In this approach, a single Reader instance is created and shared among instances of AverageYear and AverageMonth.


### Pros:

**Resource Efficiency:** Only one instance of Reader loads and processes data, reducing memory usage.

**Consistency:** All consumers use the same data source, ensuring consistency in data processing.

**Simplicity:** Managing a single Reader instance reduces complexity compared to creating multiple instances.


### Cons:

**Limited Flexibility:** All consumers share the same Reader configuration and data source, which might not be suitable if consumers have different requirements.

-----

In [None]:
class AverageYear:
    def __init__(self, config_path='config.yaml'):
        self.reader = Reader(config_path)
        self.temperatures = []

    def calculate_average(self):
        while True:
            data = self.reader.get_lines()
            if not data:
                break
            for entry in json.loads(data):
                year = int(entry['Year'])
                annual_mean_temp = float(entry['J-D'])
                self.temperatures.append((year, annual_mean_temp))

    def plot_average(self):
        if not self.temperatures:
            return
        years, temps = zip(*self.temperatures)
        plt.plot(years, temps)
        plt.xlabel('Year')
        plt.ylabel('Average Temperature Anomaly')
        plt.title('Average Yearly Temperature Anomaly')
        plt.show()

# Create an instance of AverageYear
average_year = AverageYear('C:\zshahpouri\programming 2\config.yml')

# Calculate the average temperatures
average_year.calculate_average()

# Plot the average yearly temperature anomaly
average_year.plot_average()


In [None]:
class AverageMonth:
    def __init__(self, config_path='config.yaml'):
        self.reader = Reader(config_path)
        self.temperatures = []
        self.month_names = ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun',
                            'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']

    def calculate_average(self):
        while True:
            data = self.reader.get_lines()
            if not data:
                break
            for entry in json.loads(data):
                year = int(entry['Year'])
                monthly_temps = [float(entry[month]) for month in self.month_names]
                self.temperatures.append((year, monthly_temps))



    def plot_telorance(self):
        if not self.temperatures:
            return
        years, temps = zip(*self.temperatures)
        for i, month in enumerate(self.month_names):
            monthly_avg = [temps[j][i] for j in range(len(temps))]
            plt.plot(years, monthly_avg, label=month)
        plt.xlabel('Year')
        plt.ylabel('Average Temperature Anomaly')
        plt.title('Average Monthly Temperature Anomaly')
        plt.legend()
        plt.show()

    def plot_average_per_month(self):
        if not self.temperatures:
            return
        years, temps = zip(*self.temperatures)
        monthly_avg = [sum(temp) / len(temp) for temp in zip(*temps)]
        plt.plot(self.month_names, monthly_avg, marker='o')
        plt.xlabel('Month')
        plt.ylabel('Average Temperature Anomaly')
        plt.title('Average Temperature Anomaly per Month')
        plt.show()


# Create a single instance of Reader
average_month = AverageMonth('C:\zshahpouri\programming 2\config.yml')


# Average temperatures
average_month.calculate_average()

# Plots
average_month.plot_telorance()
average_month.plot_average_per_month()


In [None]:
# There's no import of pandas in the cells above...
class AverageYear:
    '''This class calculates the average temperature anomaly per year.'''
    def __init__(self, reader):
        self.reader = reader 
        self.dataframe = pd.DataFrame()

    def calculate_avg(self):
        '''This method calculates the average temperature anomaly per year.'''

        while True: 
            # Interesting; how can this work. Both your AverageYear and you AverageMonth
            # hang the main thread. Also, since they both propagate the state of the 
            # Reader-objects, there will be a mismatch between the expected and received
            # data.
            lines = self.reader.get_lines()
            if not lines:
                break
            
            data = [json.loads(line) for line in lines]
            df = pd.DataFrame(data)
            
            df.set_index('Year', inplace=True)
            df = df.apply(pd.to_numeric, errors='coerce')  # Convert all columns to numeric, setting non-numeric values to NaN
            
            df['Average'] = df.mean(axis=1, numeric_only=True)  # calculate mean only for numeric columns
            
            self.dataframe = pd.concat([self.dataframe, df])  # Use pd.concat instead of deprecated .append
            
        print(self.dataframe['Average'])


class AverageMonth:
    ''' This class calculates the average temperature anomaly per month.'''
    def __init__(self, reader):
        self.reader = reader
        self.dataframe = pd.DataFrame()

    def calculate_avg(self):
        '''Calculates the average temperature anomaly per month.'''
        while True:
            lines = self.reader.get_lines()
            if not lines:
                break
            
            data = [json.loads(line) for line in lines]
            df = pd.DataFrame(data)
            
            df.set_index('Year', inplace=True)
            df = df.apply(pd.to_numeric, errors='coerce')  # Convert all columns to numeric, setting non-numeric values to NaN
            
            df = df.transpose()  
            
            df['Average'] = df.mean(axis=1, numeric_only=True)  # calculate mean only for numeric columns
            
            self.dataframe = pd.concat([self.dataframe, df])
        
        print(self.dataframe['Average'])



In [None]:

reader = Reader('dSST.csv', stride=5)  # stride is provided here
average_year = AverageYear(reader)
average_year.calculate_avg()

reader = Reader('dSST.csv', stride=5)  # stride is provided here
average_month = AverageMonth(reader)
average_month.calculate_avg()



In [None]:
import pandas as pd


### Extending the reade

For extending the Reader class to include methods for adding, removing, and notifying observers

We can edit the __init__ method.

we create an empty list to hold the observers.

We'll also update the get_lines method.

In [None]:
import time

class Reader:
    ''' This class reads a CSV file and returns a JSON string.'''
    def __init__(self, csv_file, stride):
        self.csv_file = csv_file
        self.stride = stride
        self.current_line = 2  # Start from line 2, assuming line 1 is the header
        self.converter = CsvConverter(linecache.getline(csv_file, 1).strip())
        self.observers = []

    def add_observer(self, observer):
        '''This method adds an observer to the list of observers.'''
        if observer not in self.observers:
            self.observers.append(observer)

    def remove_observer(self, observer):
        '''This method removes an observer from the list of observers.'''
        if observer in self.observers:
            self.observers.remove(observer)

    def notify_observers(self, lines):
        '''This method notifies all observers of a change.'''
        for observer in self.observers:
            observer.update(lines)

    def get_lines(self):
        '''This method reads a number of lines from the CSV file and returns a JSON string.'''
        lines = []
        for _ in range(self.stride):
            line = linecache.getline(self.csv_file, self.current_line)
            if line:
                lines.append(line.strip())
                self.current_line += 1
            else:
                break
        if lines:
            self.notify_observers(lines)
        time.sleep(5)


### Extending the consumers

For updating the AverageYear and AverageMonth classes to include an update method.
This method takes the lines as input and performs the same processing as before.

In [None]:
class AverageYear:
    '''This class calculates the average temperature anomaly per year.'''
    def __init__(self, reader):
        self.reader = reader
        self.dataframe = pd.DataFrame()

    def update(self, lines):
        '''This method calculates the average temperature anomaly per year.'''
        data = [json.loads(line) for line in lines]
        df = pd.DataFrame(data)
        df.set_index('Year', inplace=True)
        df['Average'] = df.iloc[:, 0:12].mean(axis=1)
        self.dataframe = pd.concat([self.dataframe, df])
        print(self.dataframe['Average'])

class AverageMonth:
    ''' This class calculates the average temperature anomaly per month.'''
    def __init__(self, reader):
        self.reader = reader
        self.dataframe = pd.DataFrame()

    def update(self, lines):
        data = [json.loads(line) for line in lines]
        df = pd.DataFrame(data)
        df.set_index('Year', inplace=True)
        df = df.iloc[:, 0:12].transpose()
        df['Average'] = df.mean(axis=1)
        self.dataframe = pd.concat([self.dataframe, df])
        print(self.dataframe['Average'])


In [None]:
reader = Reader('dSST.csv', stride=5)
average_year = AverageYear(reader)
average_month = AverageMonth(reader)
reader.add_observer(average_year)
reader.add_observer(average_month)

With this setup, every time Reader.get_lines retrieves new lines, it notifies the observers, which update their calculations and print the new averages.

Good; however, I would have liked to see some output here as well 😎.