# AR App Data
The AR App database models must be populated from the legacy AR_Database. This notebook will serve to analyze the data for integrity and clean up the data before going into the Django web app database. 

In [2]:
# import initializer
import kernel_init

# import packages
from texturedar.models import ARSample, Substrate, CCSData, FlameData, ImageData
import datetime as dt
from django.contrib.auth.models import User
import json
import os
import pandas as pd
import pytz as tz
import sys
from tqdm import tqdm

### AR Samples
Add new samples to the database.

In [12]:
# define directory containing data files
filepath = os.path.normpath(
    r'C:\Users\hsmith\repos\Development_Projects\python-development\transfer_ar_data\upload_local_data\missing_samples.csv'
)

# read the file
samples_df = pd.read_csv(filepath, header=None)
samples = [str(s) for s in samples_df[0].values]

# iterate the sample list
for s in tqdm(samples):

    #runTime = int(cleaned_samples.iloc[i, 2])
    #sem = bool(cleaned_samples.iloc[i, 3])
    createdBy = User.objects.get(username='lye')
    storage = 0

    try:
        ARSample.objects.create(
            name=s,
            runtime=-1,
            sem=False,
            storage=storage,
            created_by=createdBy,
            substrate=None,
            clean_recipe=None,
            rie_recipe=None,
            tooling=None
        )
    except Exception as e:
        print(f'{s} not inserted.')
        print(e)
        break



100%|██████████| 58/58 [00:07<00:00,  7.97it/s]


In [3]:
# now the AR sample timestamps need to be updated
# iterate the dataframe
for i in tqdm(range(cleaned_samples.shape[0])):
    get_sample = ARSample.objects.get(name=str(cleaned_samples.iloc[i, 0]))

    #date_format = '%Y-%m-%d %H:%M:%S'
    #timeStamp = dt.datetime.strptime(cleaned_samples.iloc[i, 1], date_format)
    #createdDate = tz.utc.localize(timeStamp)

    #get_sample.created = createdDate
    #get_sample.save()

100%|██████████| 474/474 [00:39<00:00, 12.11it/s]


### Add Cleaned Flame/CCS Data
Add the cleaned CCS & FLAME data to the web app database.

In [17]:
# read in the data files
ccs_data = pd.read_csv(r'C:\Users\hsmith\repos\Development_Projects\python-development\transfer_ar_data\upload_local_data\view_ccs_data.csv')
flame_data = pd.read_csv(r'C:\Users\hsmith\repos\Development_Projects\python-development\transfer_ar_data\upload_local_data\view_flame_data.csv')
print(ccs_data.columns)
print(flame_data.columns)

Index(['id', 'arsample_id', 'data_file', 'measurement_id'], dtype='object')
Index(['id', 'arsample_id', 'data_file', 'measurement_id'], dtype='object')


#### CCS DATA

In [19]:
# get queryset of samples
samples_qs = ARSample.objects.all()
measuredby = User.objects.get(username='lye')

# iterations
iters = ccs_data.shape[0]

# upload flag
UPLOAD_CCS = False

if UPLOAD_CCS:

    ## iterate the dataframe and add the records
    for i in tqdm(range(iters)):
        # get sample obj
        sampleObj = samples_qs.filter(name=str(ccs_data['arsample_id'][i]))[0]

        # CCS records
        try:
            new_ccs = CCSData.objects.create(
                arsample=sampleObj,
                etched_surface=2,
                int_time=0.12,
                avgs=1000.0,
                measurement_id=str(ccs_data['measurement_id'][i]),
                measured_by=measuredby,
                data_file=str(ccs_data['data_file'][i])
            )
        except:
            print(f"Record {ccs_data['id'][i]}, {ccs_data['arsample_id'][i]} failed.")
            break



100%|██████████| 276/276 [00:24<00:00, 11.14it/s]


#### FLAME Data

In [20]:
# get queryset of samples
samples_qs = ARSample.objects.all()
measuredby = User.objects.get(username='lye')

# iterations
iters = flame_data.shape[0]

# upload flag
UPLOAD_FLAME = True

if UPLOAD_FLAME:
    
    ## iterate the dataframe and add the records
    for i in tqdm(range(iters)):
        # get sample obj
        sampleObj = samples_qs.filter(name=str(flame_data['arsample_id'][i]))[0]

        try:
            # Flame records
            new_flame = FlameData.objects.create(
                arsample=sampleObj,
                etched_surface=2,
                int_time=4.5,
                avgs=1000.0,
                measurement_id=str(flame_data['measurement_id'][i]),
                measured_by=measuredby,
                data_file=str(flame_data['data_file'][i]),
                nlc=True,
                edc=True
            )
        except:
            print(f"Record {flame_data['id'][i]}, {flame_data['arsample_id'][i]} failed.")
            break

100%|██████████| 276/276 [00:25<00:00, 10.98it/s]


### Image Data

In [8]:
# read in the data file
img_data = pd.read_csv(
    os.path.join(
        DATA_PATH, 'cleaned_data' + os.sep + 'cleaned_image_data.csv'
    )
)

# show the dataframe info
img_data.info(verbose=False)
print(img_data.columns)

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2719 entries, 0 to 2718
Columns: 5 entries, ID to createdBy
dtypes: int64(1), object(4)
memory usage: 106.3+ KB
Index(['ID', 'Sample', 'timestamps', 'file_paths', 'createdBy'], dtype='object')


In [29]:
# get queryset of samples
samples_qs = ARSample.objects.all()
measuredby = User.objects.get(username='lye')

# iterations
iters = img_data.shape[0]

# upload flag
UPLOAD_IMG = True

if UPLOAD_IMG:

    ## iterate the dataframe and add the records
    for i in tqdm(range(iters)):

        # get sample name from dataframe
        s = str(img_data['Sample'][i])

        # get sample obj
        sampleObj = samples_qs.filter(name=s)[0]

        # # image records
        new_img = ImageData.objects.create(
            arsample=sampleObj,
            measured_by=measuredby,
            image_file=str(img_data['file_paths'][i]),
        )
        # get timestamp data
        date_format = '%Y-%m-%d %H:%M:%S'
        timeStamp = dt.datetime.strptime(img_data['timestamps'][i], date_format)
        createdDate = tz.utc.localize(timeStamp)
        # update the timestamp
        new_img.created = createdDate
        new_img.save()

100%|██████████| 2719/2719 [13:29<00:00,  3.36it/s]


In [21]:
# fix duplicate file names
ccspath = r'C:\Users\hsmith\repos\Development_Projects\python-development\transfer_ar_data\upload_local_data\ccs_duplicate_blobs.csv'
flamepath = r'C:\Users\hsmith\repos\Development_Projects\python-development\transfer_ar_data\upload_local_data\flame_duplicate_blobs.csv'
dfc = pd.read_csv(ccspath)
dff = pd.read_csv(flamepath)
print(dfc.columns)
print(dff.columns)

Index(['id', 'arsample_id', 'data_file'], dtype='object')
Index(['id', 'arsample_id', 'data_file'], dtype='object')


In [26]:
# iterations
iters = dfc.shape[0]

## iterate the dataframe and add the records
for i in tqdm(range(iters)):

    # get sample name from dataframe
    s = CCSData.objects.get(id=int(dfc['id'][i]))
    newFilename = str(s.data_file).replace("ccs_data/", f"ccs_data/{dfc['arsample_id'][i]}_")
    s.data_file = newFilename
    s.save()


100%|██████████| 160/160 [00:14<00:00, 10.74it/s]


In [27]:
# iterations
iters = dff.shape[0]

## iterate the dataframe and add the records
for i in tqdm(range(iters)):

    # get sample name from dataframe
    s = FlameData.objects.get(id=int(dff['id'][i]))
    newFilename = str(s.data_file).replace("flame_data/", f"flame_data/{dff['arsample_id'][i]}_")
    s.data_file = newFilename
    s.save()

100%|██████████| 160/160 [00:14<00:00, 10.75it/s]
