Author: Mykyta Minenko\
Note: The following code is tested on Linux, Ubuntu 20.04 specifically and may not work on other operating systems or distributions.

# Imports

In [19]:
#task1
import numpy as np
import pandas as pd
import string
import re
from sqlalchemy_utils import database_exists, create_database
from sqlalchemy import Column, Integer, String
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy import create_engine
from sqlalchemy.orm import sessionmaker
from pymongo import MongoClient

#task2
import urllib.request
import base64
from PIL import Image
from io import BytesIO
import hashlib

#task3
import asyncio
import subprocess

# Task 1

In [82]:
class Task1:
    """
    Class for storing Task1 functions.
    
    """
    def __init__(self):
        print("Successfuly initialized task 1!\n")
        print("Example of usage:\n\
            task1 = Task1()\n\
            task1.subtask_A('my_file1')\n\
            time.sleep(2) # to let subtask_A finish saving file\n\
            task1.subtask_B('my_file1.csv', 'my_file2.csv')\n\
            task1.subtask_C('my_file.csv', 'mysqldb')\n\
            task1.subtask_D('my_file.csv', 'mymongodb')")
        
    @staticmethod    
    def subtask_A(file_name, num_records=1024, num_columns=6, len_items=8, random_seed=None, string_characters=None):
        """
        Generates a .csv file with selected number of records and columns and fixed length of items.

        Parameters
        ----------
        file_name: str
            The desired name of a file with file extension.
        num_records : int, optional
            The number of records to generate. Defaults to 1024.
        num_columns : int, optional
            The number of columns to generate. Defaults to 6.
        len_items : int, optional
            The length of items in each column. Defaults to 8.
        random_seed : int, optional
            Random seed value in order to get reproducable results. Defaults to 42.
        string_characters: str, optional
            The string of values from which items are generated.
            By default consists of numbers and latin letters.

        Returns
        -------
        out : str
            Name of the created file with extention.

        Example
        --------
        >>> subtask_A("my_file", num_columns=10, random_seed=42)
        'my_file.csv'

        """

        if string_characters is None:
            string_characters = list(string.ascii_letters + string.digits)
        elif string_characters.type != 'str':
            raise ValueError("Parameter string_characters should be of type str")
        elif not string_characters:
            raise ValueError("Parameter string_characters should not be empty string")

        if random_seed is not None:
            np.random.seed(random_seed)

        # generate strings via list comprehension
        content = [[''.join(list(np.random.choice(list(string_characters), len_items, replace=True))) 
                    for i in range(num_columns)] 
                   for j in range(num_records)]
        df = pd.DataFrame(content)

        # save to .csv
        df.to_csv(file_name, index=False)

        return file_name

    @staticmethod 
    def subtask_B(file_to_modify, new_file_name):
        """
        Modifies a selected .csv so that odd digits are replaced with #. 
        Also deletes entries where any element of the columns begins with a vowel.

        Parameters
        ----------
        file_to_modify: str
            The name of a file to modify with file extension.
        new_file_name: str
            The name of a new file which will be created with file extension.

        Returns
        -------
        out : str
            Name of the created file with extention.

        Example
        --------
        >>> subtask_B("my_file", "modified_file")
        'modified_file.csv'

        """

        df = pd.read_csv(file_to_modify)

        # replace 13579 with "#"
        df = df.applymap(lambda x: re.sub(r"[13579]", "#", x))

        # remove rows where any element of the columns begins with a vowel
        df = df[~df.applymap(lambda x: True if re.search(r"^[aeiouAEIOU]", x) else False)].dropna().reset_index(drop=True)

        # save to .csv
        df.to_csv(new_file_name, index=False)

        return new_file_name

    @staticmethod 
    def subtask_C(file_name, db_name):
        """
        Reads .csv file and saves it in an SQLite database.
        Deletes records where in 2nd column first element is a number.

        Parameters
        ----------
        file_name: str
            The name of a file for reading with file extension.
        db_name: str
            The name of a database which will be created without extension.

        Returns
        -------
        out : None

        Example
        --------
        >>> subtask_C("my_file.csv", "mydb")

        """
        # read .csv file
        df = pd.read_csv(file_name)
        cols = list("col_" + df.columns.values)
        
        # initialize Base
        Base = declarative_base()

        # is needed to initialize table with correct arguments
        class CustomTable(Base):
            __tablename__ = 'CustomTable'
            __table_args__ = {'sqlite_autoincrement': True}

            id = Column(Integer, primary_key=True, nullable=False) 

            # not a very good or a safe approach, however it's flexible
            for col in cols:
                exec(f"{col} = Column(String)")


        def add_records(session):
            # adds records
            for index, row in df.iterrows():
                record = CustomTable(**{
                    k:v for k,v in zip(cols, list(row))
                })
                session.add(record) 

        def modify_table(session):
            # modifies table so that files 
            # where in 2nd column first element is a number are deleted
            pattern = '[0-9]*'
            session.query(CustomTable).filter(CustomTable.col_1.op('GLOB')(pattern)).delete(synchronize_session=False)

        # create the database
        engine = create_engine('sqlite:///{}.db'.format(db_name))
        Base.metadata.create_all(engine)
        
        # create and run the session
        session = sessionmaker()
        session.configure(bind=engine)
        s = session()

        try:
            add_records(s)
            modify_table(s)
            s.commit()
        except:
            s.rollback()
            raise
        finally:
            s.close()

    @staticmethod 
    def subtask_D(file_name, db_name):
        """
        Reads .csv file and saves it in an MongoDB database.
        Deletes records where in 3rd column first element is a letter.

        Parameters
        ----------
        file_name: str
            The name of a file for reading with file extension.
        db_name: str
            The name of a database which will be created without extension.

        Returns
        -------
        out : None

        Example
        --------
        >>> subtask_D("my_file.csv", "mydb")

        """
        # read .csv file
        df = pd.read_csv(file_name)
        data = df.to_dict('records')

        # create a local connection
        # requires mongoDB to be installed on a system
        myclient = MongoClient('localhost', 27017)

        # create db and collection if not exists
        mydb = myclient[db_name]
        mycol = mydb["CustomCollection"]

        # insert records in db
        result = mycol.insert_many(data)

        # regex for deleting documents where in 3rd column first element is a letter.
        myquery = { "2": {"$regex": "^[a-zA-Z]"} }

        result = mycol.delete_many(myquery)

        # close connection
        myclient.close()

In [126]:
# task1 = Task1()
# task1.subtask_A("my_file1")
# time.sleep(2)
# task1.subtask_B("my_file1.csv", "my_file2.csv")
# task1.subtask_C("my_file1.csv", "mysqldb")
# task1.subtask_D("my_file1.csv", "mymongodb")

# Task 2

In [124]:
class Task2:
    """
    Class for storing Task2 functions.
    
    """
    def __init__(self):
        print("Successfuly initialized task 2!")
        print("Example of usage:\n\
            task2 = Task2()\n\
            task2.subtask_A('https://upload.wikimedia.org/wikipedia/commons/3/31/Wandeling_over_het_Hulshorsterzand-Hulshorsterheide_07-03-2020._%28d.j.b%29_20.jpg')\n\
            task2.subtask_B('21c86e72c1dd494de4b65d5fdb2ca5e5')")
        
    @staticmethod    
    def subtask_A(image_url):
        """
        Saves .jpg image from provided url, encodes it using base64 and saves as a binary file.
        The name of the file is generated using md5.

        Parameters
        ----------
        url: str
            Link to an image.

        Returns
        -------
        out : None

        Example
        --------
        >>> subtask_A('https://upload.wikimedia.org/wikipedia/commons/3/31/Wandeling_over_het_Hulshorsterzand-Hulshorsterheide_07-03-2020._%28d.j.b%29_20.jpg')
        
        """
        # download original image
        urllib.request.urlretrieve(image_url, 'original_image.jpg')
              
        with open('original_image.jpg', "rb") as f:
            # encode with base64
            data = base64.b64encode(f.read())
        # compute md5 hash
        result = hashlib.md5(data) 
        filename = result.hexdigest()
        # create file with name calculated using md5
        with open(filename, "wb") as f:
            f.write(data)
        
        print(filename)
        
    @staticmethod    
    def subtask_B(file_name):
        """
        Restores image file from subtask_A to the .jpg format.

        Parameters
        ----------
        file_name: str
            Name of file to restore.

        Returns
        -------
        out : None

        Example
        --------
        >>> subtask_B('21c86e72c1dd494de4b65d5fdb2ca5e5')
        
        """   
        with open(file_name, "rb") as f:
            data = f.read()
        # decode with base64
        im = Image.open(BytesIO(base64.b64decode(data)))
        # save restored image
        im.save('retreived_image.jpg', 'JPEG')
               

In [127]:
# task2 = Task2()
# task2.subtask_A('https://upload.wikimedia.org/wikipedia/commons/3/31/Wandeling_over_het_Hulshorsterzand-Hulshorsterheide_07-03-2020._%28d.j.b%29_20.jpg')
# task2.subtask_B('21c86e72c1dd494de4b65d5fdb2ca5e5')

# Task 3

In [236]:
class Task3:
    """
    Class for storing Task3 functions.
    
    """
    def __init__(self):
        print("Successfuly initialized task 3!")
        print("Example of usage:\n\
            task3 = Task3()\n\
            await task3.subtask('task3.txt')")
        
    @staticmethod    
    async def subtask(file_name):
        """
        Pings all devices provided in the form "name: IP-address" from a text file.
        Saves results in a text file responses.txt
        Note: in task was mentioned form "name: MAC-address", which requires obtaining IP-addresses from MAC-addresses.
        I tried using scapy for this purpose however it seem to require root privilegies, which, if possible, I would 
        like to avoid giving (in order remove chances of causing any damage to my system). However i admit that i might be doing this wrong and
        that there might be the other way.
        With that being said i decided to stick with the form "name: IP-address".

        Parameters
        ----------
        file_name: str
            Name of the file.

        Returns
        -------
        out : None

        Example
        --------
        >>> subtask('task3.txt')
        
        """
        # read file using pandas
        # other solutions are possible, for example with open (<name>, "rb") as f: ...
        data = pd.read_csv("task3.txt", sep=":", header=None)
        ips = data[1].tolist()
        
        async def ping(host):
            ping_process = await asyncio.create_subprocess_shell("ping -c 1 " + host, stdout=asyncio.subprocess.PIPE,
                stderr=asyncio.subprocess.PIPE)
            stdout, stderr = await ping_process.communicate()
            return stdout

        async def ping_list(hosts):
            # pings list of hosts
            tasks = []
            for ip in ips:
                task = asyncio.create_task(ping(ip))
                tasks.append(task)

            await asyncio.gather(*tasks, return_exceptions = True)
            return [task.result() for task in tasks]

        results = await ping_list(ips)
        data[2] = results
        data.to_csv('ping_results.csv', index=False)

In [242]:
# task3 = Task3()
# await task3.subtask('task3.txt')