##### Author: Praveen Saxena
##### Email: saxep01@gmail.com
##### Create Date: 7/24/2021
##### Purpose: Analyze the table _**event\_table**_ in database _**wang159\_myrmekes**_.

--------------------

# Input

In [1]:
database = 'wang159_myrmekes'
table = 'event_table'

In [2]:
db_table = "%s.%s" % (database, table)
print(db_table)

wang159_myrmekes.event_table


--------------------

# Preliminaries

In [3]:
%%capture 

import pandas as pd
from pprint import pprint
from IPython.display import display, Markdown
import os

from nanoHUB.application import Application

application = Application.get_instance()
engine = application.new_db_engine(database)

--------------------

# Table Information

## Table Indexes

In [5]:
sql = '''
SHOW INDEX FROM %s; 
'''

df = pd.read_sql(sql % db_table, engine)
display(df)

Unnamed: 0,Table,Non_unique,Key_name,Seq_in_index,Column_name,Collation,Cardinality,Sub_part,Packed,Null,Index_type,Comment,Index_comment


## Table Columns

In [6]:
sql = '''
SHOW FULL COLUMNS FROM %s;
'''
df = pd.read_sql(sql % db_table, engine)
display(df.head())

Unnamed: 0,Field,Type,Collation,Null,Key,Default,Extra,Privileges,Comment
0,start_datetime,datetime,,YES,,,,select,
1,end_datetime,datetime,,YES,,,,select,
2,timeline_domain,varchar(100),latin1_swedish_ci,YES,,,,select,
3,timeline_label,varchar(100),latin1_swedish_ci,YES,,,,select,
4,id_nanohub,varchar(100),latin1_swedish_ci,YES,,,,select,


--------------------

# Data Information

## Data Query

In [7]:
sql = '''
SELECT *
    FROM %s
LIMIT 100;
'''
df = pd.read_sql(sql % db_table, engine)

## Data Info

In [8]:
display(df.info())

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 100 entries, 0 to 99
Data columns (total 7 columns):
 #   Column           Non-Null Count  Dtype         
---  ------           --------------  -----         
 0   start_datetime   100 non-null    datetime64[ns]
 1   end_datetime     100 non-null    datetime64[ns]
 2   timeline_domain  100 non-null    object        
 3   timeline_label   100 non-null    object        
 4   id_nanohub       100 non-null    object        
 5   id_email         100 non-null    object        
 6   id_hubspot       100 non-null    object        
dtypes: datetime64[ns](2), object(5)
memory usage: 5.6+ KB


None

## Data Values

In [9]:
display(df.head())
display(df.tail())

Unnamed: 0,start_datetime,end_datetime,timeline_domain,timeline_label,id_nanohub,id_email,id_hubspot
0,2015-04-07 11:54:42,2015-04-07 11:59:48,resources,-,102608,ibukhari@purdue.edu,
1,2015-04-07 11:54:15,2015-04-07 12:00:08,home,-,102608,ibukhari@purdue.edu,
2,2015-04-07 16:04:42,2015-04-07 16:04:42,login,login,102608,ibukhari@purdue.edu,
3,2015-04-07 16:04:28,2015-04-07 16:45:57,home,-,102608,ibukhari@purdue.edu,
4,2019-08-17 06:13:15,2019-08-17 06:19:50,members,user account page,258054,rabindraningthemcha@gmail.com,


Unnamed: 0,start_datetime,end_datetime,timeline_domain,timeline_label,id_nanohub,id_email,id_hubspot
95,2016-03-22 17:02:09,2016-03-22 17:10:09,resources,2005 Molecular Conduction and Sensors Workshop,105570,anjankumar.nitw@gmail.com,
96,2016-03-22 17:03:17,2016-03-22 17:10:45,resources,edu_semi,105570,anjankumar.nitw@gmail.com,
97,2016-03-22 17:03:39,2016-03-22 17:11:02,resources,Nanotechnology 501 Lecture Series,105570,anjankumar.nitw@gmail.com,
98,2016-03-22 17:04:42,2016-03-22 17:09:57,tools,[abacus] Assembly of Basic Appliances for Coor...,105570,anjankumar.nitw@gmail.com,
99,2016-11-21 11:31:27,2016-11-21 11:31:27,login,login,128214,ge24@purdue.edu,


## Timeline Domains & Labels

In [10]:
timeline_domains_df = df.groupby('timeline_domain')['timeline_label'].unique().reset_index()
pd.options.display.max_colwidth = 5000
display('Timeline Domains', timeline_domains_df['timeline_domain'].unique())
display(timeline_domains_df)
pd.reset_option('max_colwidth')

'Timeline Domains'

array(['answers', 'courses', 'groups', 'home', 'kb', 'login', 'members',
       'projects', 'publications', 'resources', 'search', 'support',
       'tools', 'topics'], dtype=object)

Unnamed: 0,timeline_domain,timeline_label
0,answers,[-]
1,courses,"[nanoHUB-U: Fundamentals of Nanoelectronics - Part A: Basic Concepts, 2nd Edition, nanoHUB-U: Thermoelectricity: From Atoms to Systems, ECE 659: Quantum Transport, ECE 595: Fundamentals of Nanoelectronics]"
2,groups,"[Mark Lundstrom Research Group Page, nanoHUB-U, -, Education, Quantum Dot Solar Cells (QDSC) First-Year Engineering Project, Photovoltaics HUB, NEEDS: New Era Electronic Devices and Systems]"
3,home,[-]
4,kb,[Knowledge Base]
5,login,[login]
6,members,"[user account page, member #232839, register]"
7,projects,[-]
8,publications,[TAG Solar Cell Model]
9,resources,"[-, courses, ECE 453 Lecture 30 Quantum Capacitance, index.php, Illinois PHYS466 2013 Atomic Scale Simulations, prismspfmisfit, Exams for Semiconductor Device Fundamentals, ECE 606 Principles of Semiconductor Devices, Quick Review of Semiconductor Fundamentals, A Primer on Semiconductor Fundamentals, TAG Solar Cell Model p i n thin film 1 0 0, tedev, NEEDS UC Berkeley Workshop, The Berkeley Model Development Environment A MATLAB based Platform for Modeling and Analyzing Nanosc, NEEDS Seminar Series, The Role of Graphene in Semiconductor Technologies, ECE 656 Electronic Transport in Semiconductors Fall 2011 , Solar Cell Fundamentals, Solar Cells Operation and Modeling, NCN Nanophotonics Tutorials, semidop, Uniform versus delta doping in 1D heterostructures an Exercise, ECE 606 Solid State Devices, Illinois ECE 440 Solid State Electronic Devices, 2005 Molecular Conduction and Sensors Workshop, edu_semi, Nanotechnology 501 Lecture Series]"


## First & Last Points by DateTime

In [11]:
sorted_start_datetime_df = df.sort_values(by='start_datetime')

In [12]:
first = sorted_start_datetime_df.iloc[0]
first = pd.Series(first, name='First Data Point').to_frame()
display(first)

last = sorted_start_datetime_df.iloc[-1]
last = pd.Series(last, name='Last Data Point').to_frame()
display(last)


Unnamed: 0,First Data Point
start_datetime,2009-04-12 15:06:59
end_datetime,2009-04-12 15:06:59
timeline_domain,login
timeline_label,login
id_nanohub,13950
id_email,mluisier@purdue.edu
id_hubspot,


Unnamed: 0,Last Data Point
start_datetime,2019-11-15 12:59:43
end_datetime,2019-11-15 13:04:46
timeline_domain,groups
timeline_label,nanoHUB-U
id_nanohub,73743
id_email,ekayser@purdue.edu
id_hubspot,
