# Analytics server

In [1]:
from os import listdir
# Data to serve with our API
datasets = listdir("data")
datasets

['cars.csv', 'united_nations.csv']

In [2]:
import json
json.dumps(datasets)

'["cars.csv", "united_nations.csv"]'

## Working With Files in Python
https://realpython.com/working-with-files-in-python/

In [9]:
entries = os.listdir('data/')
for entry in entries:
    print(entry)

cars.csv
data.csv


In [11]:
import os

with os.scandir('data/') as entries:
    for entry in entries:
        print(entry.name)

cars.csv
data.csv


In [61]:
import os

with os.scandir("data") as dir_entries:
    for entry in dir_entries:
        info = entry.stat()
        print(info)

os.stat_result(st_mode=33206, st_ino=0, st_dev=0, st_nlink=0, st_uid=0, st_gid=0, st_size=14937, st_atime=1590573945, st_mtime=1530305963, st_ctime=1590573945)
os.stat_result(st_mode=33206, st_ino=0, st_dev=0, st_nlink=0, st_uid=0, st_gid=0, st_size=25830, st_atime=1590573945, st_mtime=1408008491, st_ctime=1590573945)


Same results with *pathlib*

In [13]:
from pathlib import Path
current_dir = Path('data')
for path in current_dir.iterdir():
    info = path.stat()
    print(info.st_mtime)

1530305963.4392028
1408008491.0


In [16]:
from datetime import datetime
from os import scandir

def convert_date(timestamp):
    d = datetime.utcfromtimestamp(timestamp)
    formated_date = d.strftime('%d %b %Y')
    return formated_date

def get_files():
    dir_entries = scandir('data/')
    for entry in dir_entries:
        if entry.is_file():
            info = entry.stat()
            print(f'{entry.name}\t Last Modified: {convert_date(info.st_mtime)}')

In [17]:
 get_files()

cars.csv	 Last Modified: 29 Jun 2018
data.csv	 Last Modified: 14 Aug 2014


## Retrieve file properties
https://www.w3resource.com/python-exercises/python-basic-exercise-107.php

In [19]:
import os.path
import time

print('File         :', "data/cars.csv")
print('Access time  :', time.ctime(os.path.getatime("data/cars.csv")))
print('Modified time:', time.ctime(os.path.getmtime("data/cars.csv")))
print('Change time  :', time.ctime(os.path.getctime("data/cars.csv")))
print('Size         :', os.path.getsize("data/cars.csv"))

File         : data/cars.csv
Access time  : Wed May 27 12:05:45 2020
Modified time: Fri Jun 29 22:59:23 2018
Change time  : Wed May 27 12:05:45 2020
Size         : 14937


In [75]:
from hurry.filesize import size

def file_info():
    dir_entries = scandir('data/')
    for entry in dir_entries:
        name = entry.name
        lastModified = entry.stat().st_mtime
        filesize = size(entry.stat().st_size)
        print(name)
        print(lastModified)
        print(filesize)

In [76]:
file_info()

cars.csv
1530305963.4392028
14K
data.csv
1408008491.0
25K


In [63]:
dir_entries = scandir('data/')
for entry in dir_entries:
    if entry.is_file():
        info = entry.stat()
        print(info)

os.stat_result(st_mode=33206, st_ino=0, st_dev=0, st_nlink=0, st_uid=0, st_gid=0, st_size=14937, st_atime=1590573945, st_mtime=1530305963, st_ctime=1590573945)
os.stat_result(st_mode=33206, st_ino=0, st_dev=0, st_nlink=0, st_uid=0, st_gid=0, st_size=25830, st_atime=1590573945, st_mtime=1408008491, st_ctime=1590573945)


In [97]:
 import pandas as pd

 df = pd.DataFrame(columns= ['name','lastModified','filesize'])

 df

Unnamed: 0,name,lastModified,filesize


In [106]:
dir_entries = scandir('data/')
for entry in dir_entries:
     df = df.append({'name' : entry.name, 'lastModified' : entry.stat().st_mtime, 'filesize' : size(entry.stat().st_size)}, ignore_index=True)    

# df.to_json("dataset.json",orient='records')
df.to_json(orient='records')

'[{"name":"cars.csv","lastModified":1530305963.4392027855,"filesize":"14K"},{"name":"united_nations.csv","lastModified":1408008491.0,"filesize":"25K"},{"name":"cars.csv","lastModified":1530305963.4392027855,"filesize":"14K"},{"name":"united_nations.csv","lastModified":1408008491.0,"filesize":"25K"}]'

In [99]:
df

Unnamed: 0,name,lastModified,filesize
0,cars.csv,1530306000.0,14K
1,united_nations.csv,1408008000.0,25K


In [78]:
df = pd.DataFrame(columns=['A'])
for i in range(5):
    df = df.append({'A': i}, ignore_index=True)

df

Unnamed: 0,A
0,0
1,1
2,2
3,3
4,4


Append DataFrame
https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.append.html

## Better way to create json file from multiple lists?
https://stackoverflow.com/questions/50519818/better-way-to-create-json-file-from-multiple-lists/50520116

In [33]:
import json

devices = ['iphone', 'ipad', 'ipod', 'watch']
cities = ['NY', 'SFO', 'LA', 'NJ']
companies = ['Apple', 'Samsung', 'Walmart']

lists = ['devices', 'cities', 'companies']

data = {listname: globals()[listname] for listname in lists}
with open('abc.json', 'w') as outfile:
    json.dump(data, outfile, indent=4)


In [38]:
import pandas as pd

devices = ['iphone', 'ipad', 'ipod', 'watch']
cities = ['NY', 'SFO', 'LA', 'NJ']
companies = ['Apple', 'Samsung', 'Walmart']

df = pd.DataFrame(list(zip(devices,cities,companies)),columns =['devices', 'cities','companies'])
df

Unnamed: 0,devices,cities,companies
0,iphone,NY,Apple
1,ipad,SFO,Samsung
2,ipod,LA,Walmart


In [40]:
df.to_json("df.json",orient='records')

## Reading JSON from a File
https://stackabuse.com/reading-and-writing-json-to-a-file-in-python/

In [104]:
import json
from datetime import datetime

def get_timestamp():
    return datetime.now().strftime(("%Y-%m-%d %H:%M:%S"))


data = {}
data['people'] = []
data['people'].append({
    "fname": "Doug",
    "lname": "Farrell",
    "timestamp": get_timestamp()
})
data['people'].append({
    "fname": "Kent",
    "lname": "Brockman",
    "timestamp": get_timestamp()
})
data['people'].append({
    "fname": "Bunny",
    "lname": "Easter",
    "timestamp": get_timestamp()
})

with open('people.txt', 'w') as outfile:
    json.dump(data, outfile)

In [105]:

with open('people.txt') as json_file:
    data = json.load(json_file)
    for p in data['people']:
        print('Name: ' + p['fname'])
        print('Website: ' + p['lname'])
        print('From: ' + p['timestamp'])
        print('')

Name: Doug
Website: Farrell
From: 2020-05-27 15:31:12

Name: Kent
Website: Brockman
From: 2020-05-27 15:31:12

Name: Bunny
Website: Easter
From: 2020-05-27 15:31:12



In [108]:
import dataset

data = dataset.read()

In [110]:
data

'[{"name":"cars.csv","lastModified":1530305963.4392027855,"filesize":"14K"},{"name":"united_nations.csv","lastModified":1408008491.0,"filesize":"25K"}]'

In [116]:
json.loads(data)

[{'name': 'cars.csv', 'lastModified': 1530305963.4392028, 'filesize': '14K'},
 {'name': 'united_nations.csv',
  'lastModified': 1408008491.0,
  'filesize': '25K'}]

## Request analytics server

In [1]:
import requests as rq

In [2]:
res = rq.get("http://127.0.0.1:8000/api/dataset")
res.json()

[{'name': 'cars.csv', 'lastModified': 1609092360.4322233, 'filesize': '14K'},
 {'name': 'united_nations.csv',
  'lastModified': 1609092360.4352262,
  'filesize': '25K'}]

In [41]:
# files = {'data_classification.csv': open('data_classification.csv', 'rb')}

In [45]:
files = {'file': ('data_classification.csv', open('data_classification.csv', 'rb'))}

In [47]:
r = rq.post("http://127.0.0.1:8000/api/dataset", files=files)
r.content

b'{"filename":"data_classification.csv","content_type":""}'

Example of post request:
https://requests.readthedocs.io/en/v0.8.2/user/quickstart/#post-a-multipart-encoded-file

In [6]:
url = 'http://httpbin.org/post'
files = {'data_classification.csv': open('data_classification.csv', 'r')}
r = rq.post(url, files=files)
r.json()

{'args': {},
 'data': '',
 'files': {'data_classification.csv': '4.8550642421469092,9.6399615658447146,1\n8.6254397593438625,0.058926530182361603,0\n3.8281915383413434,0.72319923434406519,0\n7.1509548369795084,3.899420415982604,1\n6.4779004408046603,8.1981805479153991,1\n1.9222695007920265,1.3314272649586201,0\n8.9782158890739083,0.99343751091510057,1\n6.6356030758470297,8.5428026784211397,1\n7.6723589515313506,5.4163997946307063,1\n4.8660153336822987,2.0426712930202484,0\n6.8614049674943089,9.655309715308249,1\n8.5404213238507509,2.5903742294758558,1\n3.7178806541487575,5.3816621145233512,0\n9.1812971234321594,0.1714746467769146,1\n9.5601400220766664,0.02494648564606905,0\n5.9713694732636213,4.1883018705993891,1\n9.4382026931270957,1.9438124401494861,1\n4.3575510196387768,9.8879833146929741,1\n4.5403319643810391,6.7138733575120568,1\n1.5491016302257776,9.3751321639865637,0\n8.0819737119600177,9.8422068124637008,1\n9.6204650029540062,2.0993275381624699,1\n8.8347709784284234,3.152204123

In [7]:
f = open('data_classification.csv', 'r')

In [12]:
f.seek(0,os.SEEK_END)
fsize = f.tell()
fsize

4104

In [13]:
from hurry.filesize import size
size(fsize)

'4K'

In [49]:
files = {'file': ('data_classification.csv', open('data_classification.csv', 'rb'))}
r = rq.post("http://127.0.0.1:8000/api/dataset", files=files)
r.content

b'{"filename":"data_classification.csv","content_type":""}'

## Get insights into the datasets

In [40]:
import json
dataset = "cars.csv"
r = rq.get("http://127.0.0.1:8000/api/dataset/"+dataset)
cars = json.loads(r.json())
cars[0]

{'mpg': 18.0,
 'cylinders': 8,
 'cubicinches': 307,
 'horsepower': 130,
 'weightlbs': 3504,
 'time': 12,
 'year': 1971,
 'brand': 'chevrolet',
 'origin': 'US'}