# Setup

In [None]:
pip install requests sqlalchemy psycopg2

In [None]:
import requests
import pandas as pd
from datetime import datetime
from sqlalchemy import create_engine, MetaData, Table, Column, Integer, String, Float, Date, DateTime
from sqlalchemy.orm import sessionmaker

# Functions

In [None]:
def fetch_covid_data():
    url = "https://api.corona-zahlen.org/states/history/incidence"
    response = requests.get(url)
    data = response.json()
    
    # Extract the relevant data
    records = []
    for state in data['data']:
        for record in data['data'][state]['history']:
            records.append({
                'state': state,
                'date': record['date'],
                'incidence': record['weekIncidence']
            })
    
    df = pd.DataFrame(records)
    return df

def create_database_connection():
    # Replace with your database connection string
    DATABASE_URI = 'postgresql+psycopg2://user:password@localhost/dbname'
    engine = create_engine(DATABASE_URI)
    return engine

def create_table(engine):
    metadata = MetaData()
    
    incidence_table = Table('covid_incidence', metadata,
                            Column('id', Integer, primary_key=True, autoincrement=True),
                            Column('state', String, nullable=False),
                            Column('date', Date, nullable=False),
                            Column('incidence', Float, nullable=False),
                            Column('start_date', DateTime, default=datetime.utcnow),
                            Column('end_date', DateTime, nullable=True),
                            Column('is_current', String(1), default='Y'))
    
    metadata.create_all(engine)

def insert_data(engine, df):
    Session = sessionmaker(bind=engine)
    session = Session()
    connection = engine.connect()
    
    # Load existing data to compare
    existing_data = pd.read_sql_table('covid_incidence', connection)
    
    for index, row in df.iterrows():
        # Check if there's already a record with the same state and date
        existing_record = existing_data[
            (existing_data['state'] == row['state']) & 
            (existing_data['date'] == row['date']) & 
            (existing_data['is_current'] == 'Y')
        ]
        
        if not existing_record.empty:
            # Check if the incidence value has changed
            if existing_record.iloc[0]['incidence'] != row['incidence']:
                # Update the existing record to set end_date and is_current='N'
                session.query(incidence_table).filter_by(id=existing_record.iloc[0]['id']).update({
                    'end_date': datetime.utcnow(),
                    'is_current': 'N'
                })
                session.commit()
                
                # Insert the new record
                new_record = {
                    'state': row['state'],
                    'date': row['date'],
                    'incidence': row['incidence'],
                    'start_date': datetime.utcnow(),
                    'is_current': 'Y'
                }
                connection.execute(incidence_table.insert().values(new_record))
        else:
            # Insert the new record if it doesn't exist
            new_record = {
                'state': row['state'],
                'date': row['date'],
                'incidence': row['incidence'],
                'start_date': datetime.utcnow(),
                'is_current': 'Y'
            }
            connection.execute(incidence_table.insert().values(new_record))
    
    session.close()

def main():
    engine = create_database_connection()
    create_table(engine)
    df = fetch_covid_data()
    insert_data(engine, df)

# Main

In [None]:
if __name__ == "__main__":
    main()

# Tests

In [1]:
import unittest
from unittest.mock import patch, MagicMock
import pandas as pd
from sqlalchemy import create_engine
from sqlalchemy.exc import OperationalError
import requests
from io import StringIO
from datetime import datetime
import covid_data_script

class TestCovidDataFunctions(unittest.TestCase):

    @patch('requests.get')
    def test_fetch_covid_data(self, mock_get):
        # Mock the API response
        mock_response = {
            'data': {
                'DE-BW': {
                    'history': [
                        {'date': '2023-05-01T00:00:00Z', 'weekIncidence': 50.0},
                        {'date': '2023-05-02T00:00:00Z', 'weekIncidence': 55.0}
                    ]
                }
            }
        }
        mock_get.return_value.json.return_value = mock_response

        expected_df = pd.DataFrame({
            'state': ['DE-BW', 'DE-BW'],
            'date': ['2023-05-01T00:00:00Z', '2023-05-02T00:00:00Z'],
            'incidence': [50.0, 55.0]
        })

        result_df = covid_data_script.fetch_covid_data()
        pd.testing.assert_frame_equal(result_df, expected_df)

    def test_create_database_connection(self):
        engine = covid_data_script.create_database_connection()
        self.assertIsNotNone(engine)

    @patch('sqlalchemy.create_engine')
    def test_create_table(self, mock_create_engine):
        mock_engine = MagicMock()
        mock_create_engine.return_value = mock_engine
        
        # Ensure the table creation runs without error
        try:
            covid_data_script.create_table(mock_engine)
        except Exception as e:
            self.fail(f"create_table raised an exception: {e}")

    @patch('pandas.read_sql_table')
    @patch('sqlalchemy.orm.sessionmaker')
    def test_insert_data(self, mock_sessionmaker, mock_read_sql_table):
        mock_session = MagicMock()
        mock_sessionmaker.return_value = MagicMock(return_value=mock_session)
        
        # Mock existing data in the table
        mock_existing_data = pd.DataFrame({
            'id': [1],
            'state': ['DE-BW'],
            'date': [pd.Timestamp('2023-05-01')],
            'incidence': [50.0],
            'start_date': [datetime.utcnow()],
            'end_date': [None],
            'is_current': ['Y']
        })
        mock_read_sql_table.return_value = mock_existing_data

        mock_engine = MagicMock()

        # Data to insert
        df = pd.DataFrame({
            'state': ['DE-BW'],
            'date': [pd.Timestamp('2023-05-01')],
            'incidence': [55.0]
        })

        try:
            covid_data_script.insert_data(mock_engine, df)
        except Exception as e:
            self.fail(f"insert_data raised an exception: {e}")

        # Check if session query and execute were called
        self.assertTrue(mock_sessionmaker.called)
        self.assertTrue(mock_session.query.called)
        self.assertTrue(mock_session.commit.called)
        self.assertTrue(mock_engine.connect().execute.called)

if __name__ == "__main__":
    unittest.main()
    

1

# Backup

In [None]:
# linux bash
# crontab -e
# 0 0 * * * /usr/bin/python3 /path/to/your_script.py