# `Random data generator with pandas`
___
An outline for a random data generator using pandas dataframe.

In [1]:
import numpy as np
from numpy import random
import pandas as pd
import random

# Add logging modules
import logging
import os 
from pathlib import Path

# Config Log (log record attributes @docs.python.org /logrecords-attributes)
logging.basicConfig(level=logging.INFO, filename='rdg.log', filemode='w',
format='%(asctime)s - %(filename)s - %(name)s - %(levelname)s - %(message)s')

# logging.debug('This is a debug Message')
# logging.info('This is an info Message')
# logging.warning('This is a warning Message')
# logging.error('This is an error Message')
# logging.critical('This is an error Message')

In [None]:
# create raw series
ID_series_array = np.arange(1, 101, 1)

# create prices series
prices_series_array = np.random.uniform(0.99, 24.99, 100)
# ! Round for dataframe

d1 = ["Juicy", "Round", "Curly", "Green", "Blue", "Flat", "Expensive"]
d2 = ["french", "nifty", "smelly", "invisible", "charged", "alive", "shiny"]
d3 = ["bread", "cheese", "honey", "fruit", "dish washing liquid", "batteries", "cutlery"]

products_list = []
for num in range(1, 101):
    appendd1 = random.choice(d1)
    appendd2 = random.choice(d2)
    appendd3 = random.choice(d3)
    products_list.append(f"{appendd1}, {appendd2} {appendd3}")

products_series_array = np.array(products_list)

In [None]:
# create dataframe
data = {
    'ID': ID_series_array,
    'Product': products_series_array,
    'Price ($)': np.round(prices_series_array, 2),
}

pd_data = pd.DataFrame(data, columns=['ID', 'Product', 'Price ($)'])

# change index
pd_data = pd_data.set_index('ID')
pd_data.head()

In [None]:
# Save data
pd_data.to_csv('random_product_data.csv', index=True)

### `Write as program`
___

In [2]:
def random_data_generator_n(size: int, filename: str) -> None:
    """Writes pd dataframe (series: ID (index), Product, Price) in csv file.

    Args:
        size (int): n of random datasets generated
    """
    data_n = size
    logging.info(f' Dataset size {size}')

    ID_series_array = np.arange(1, 1 + data_n, 1)
    prices_series_array = np.random.uniform(0.99, 24.99, data_n)

    dx = {
        'd1': ["Juicy", "Round", "Curly", "Green", "Blue", "Flat", "Expensive"],
        'd2': ["french", "nifty", "smelly", "invisible", "charged", "alive", "shiny"],
        'd3': ["bread", "cheese", "honey", "fruit", "dish washing liquid", "batteries", "cutlery"],
    }   

    products_list = []
    for num in range(1, 1 + data_n):
        appendd1 = random.choice(dx['d1'])
        appendd2 = random.choice(dx['d2'])
        appendd3 = random.choice(dx['d3'])
        products_list.append(f"{appendd1}, {appendd2} {appendd3}")

    products_series_array = np.array(products_list)

    data = {
        'ID': ID_series_array,
        'Product': products_series_array,
        'Price ($)': np.round(prices_series_array, 2),
    }

    pd_data = pd.DataFrame(data, columns=['ID', 'Product', 'Price ($)'])

    pd_data = pd_data.set_index('ID')

    pd_data.to_csv(filename, index=True)
    logging.info(f'File was saved as {filename}')
    # pd_data[pd_data.duplicated() == True]

# add error handling (try/except)

In [9]:
if __name__ == "__main__":
    try:
        random_data_generator_n(500, 'new_product_data.csv')
    except TypeError:
        print('First value must be an integer, second value a \'string\'')
    except:
        print('Ups, something went wrong!')
    else:
        print('Data was created successfully')

Data was created successfully
