### Zadanie dla chetnych

* Stworz klase DataProcess która będzie:
   * Przetwarzać plik /var/log/syslog z logami,
   * Dodatkowo można użyć wyrażeń regularnych,
   * Stworzy statystyki: Ile było errorów w zakresie czasu
       * Dodatkowo można stworzyć metodę do wizualizacji (Histogram)
       * Pliki z wizualizacji zapisuj na dysku

* Klasa ta jest obiektem w sqlAlchemy(Orm):
   * Czyli Zapisujemy wyniki przetwarzania do bazy danych,

* Stworzcie zapytanie sql ktore bedzie wyswietlac wyniki przetwarzania

** Użyjcie flaska - stwórzcie endpoint do wyświetlania listy statystyk

In [1]:
%matplotlib inline

import re
import os
import calendar
from datetime import datetime, timezone
import matplotlib.pyplot as plt
import numpy as np

In [2]:
print(os.getcwd())

/media/sf_SharedFolderDS/code/bootcamp_data_science/Week_02


In [75]:
class DataProcess:
    """TODO: Descritpion"""
    @staticmethod
    def get_file_contents(fpath="../../../../../var/log/syslog"):
        """Returns data contents. Defaults to syslog"""
        return open(fpath)
        
    @staticmethod
    def print_file_contents(file=""):
        """Prints out file contents. Default value to syslog"""
        if len(file) < 1:
            file = "../../../../../var/log/syslog"
        fh = DataProcess.get_file_contents(file)
        for line in fh:
            print(line)
    @staticmethod
    def print_lines_with(pattern=r"error"):
        """Prints out lines with errors. Default value to error"""
        fh = DataProcess.get_file_contents()
        for line in fh:
            if not re.findall(pattern, line, re.IGNORECASE): continue
            print(line)
    @staticmethod        
    def count_errors():
        fh = DataProcess.get_file_contents()
        errors_cnt = 0
        pattern=r"error"
        for line in fh:
            if not re.findall(pattern, line, re.IGNORECASE): continue
            errors_cnt += 1
        err_arr = DataProcess.get_error_data()
        time_delta = err_arr[-1][0] - err_arr[0][0]
        return errors_cnt, time_delta
    @staticmethod        
    def get_error_data():
        """Returns error array with time, name and part of description"""
        fh = DataProcess.get_file_contents()
        months_dict = dict((v,k) for k,v in enumerate(calendar.month_abbr))
        pattern=r"error"
        error_array = []
        for line in fh:
            if not re.findall(pattern, line, re.IGNORECASE): continue
            err_type = line.split(':')[5]
            err_name = line.split(':')[6]
            month = line.split(' ')[0]
            day = line.split(' ')[1]
            time = line.split(' ')[2]
            t = datetime(2018, months_dict[month], int(day), int(time.split(":")[0]), int(time.split(":")[1]), int(time.split(":")[2]), 345, tzinfo=timezone.utc)
            error_array.append([t, err_type, err_name])
        
        return error_array
    @staticmethod
    def plot_errors():
        err_arr = DataProcess.get_error_data() 
        dates = np.array([x[0] for x in err_arr])
        values = np.array([1 for x in err_arr])
        plt.plot_date(dates, values)
        plt.title('Error Log')
        plt.xlabel("Date")
        plt.ylabel("Error")
        plt.savefig("Error_Log.png")
        plt.show()

In [76]:
DataProcess.get_error_data()

[[datetime.datetime(2018, 9, 22, 9, 26, 35, 345, tzinfo=datetime.timezone.utc),
  ' FuMain'],
 [datetime.datetime(2018, 9, 22, 9, 26, 59, 345, tzinfo=datetime.timezone.utc),
  ' failed to call gs_plugin_add_updates on fwupd'],
 [datetime.datetime(2018, 9, 22, 9, 27, 0, 345, tzinfo=datetime.timezone.utc),
  ' FuMain'],
 [datetime.datetime(2018, 9, 22, 9, 30, 52, 345, tzinfo=datetime.timezone.utc),
  ' zeitgeist-datahub.vala'],
 [datetime.datetime(2018, 9, 22, 10, 23, 32, 345, tzinfo=datetime.timezone.utc),
  ' zeitgeist-datahub.vala'],
 [datetime.datetime(2018, 9, 22, 14, 10, 43, 345, tzinfo=datetime.timezone.utc),
  ' zeitgeist-datahub.vala']]

## Create DB to store errors

Implementation with https://www.pythoncentral.io/introductory-tutorial-python-sqlalchemy/

In [117]:
import os
import sys
from sqlalchemy import Column, ForeignKey, String, DateTime, Integer
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy.orm import relationship
from sqlalchemy import create_engine
from sqlalchemy.orm import sessionmaker
 
Base = declarative_base()

In [118]:
class ErrorLog(Base):
    __tablename__ = 'errorlog'
    id = Column(Integer, primary_key=True)
    error_type = Column(String(250))
    error_name = Column(String(250))
    error_date = Column(DateTime, default=datetime.utcnow)

In [119]:
engine = create_engine('sqlite:///error_log.db')

In [120]:
Base.metadata.create_all(engine)

In [121]:
Base.metadata.bind = engine
DBSession = sessionmaker(bind=engine)

In [122]:
session = DBSession()

In [123]:
new_error = ErrorLog(error_name='New error')
session.add(new_error)
session.commit()

In [124]:
session.query(ErrorLog).all()

[<__main__.ErrorLog at 0x7f463b0b6c88>, <__main__.ErrorLog at 0x7f463b0f7908>]

In [125]:
some_error = session.query(ErrorLog).first()

In [126]:
print(some_error)

<__main__.ErrorLog object at 0x7f463b0b6c88>


In [127]:
print(some_error.error_name)

New error


In [129]:
session.query(ErrorLog).filter_by(error_name='New error').delete()

2

## Persist all errors in DB

In [160]:
for record in DataProcess.get_error_data():
    new_error = ErrorLog(error_name=record[2], error_type=record[1], error_date=record[0])
    session.add(new_error)
session.commit()

In [161]:
# session.query(ErrorLog).delete()

In [166]:
for err in session.query(ErrorLog).all():
    print(f'{err.error_date}: {err.error_type} - {err.error_name}')

