In [0]:
# All imports
import pandas as pd
import numpy as np

**Data Structures in "pandas"**

* Series is a one-dimensional labeled array capable of holding any data type

* DataFrame is a 2-dimensional labeled data structure with columns of potentially different types.



# Input/Output

In [0]:
# Pandas support a lot of formats as input like json, csv, text, html, etc. 
# Here we will be taking the input as csv [comma seperated values].
# we can also use various parameters in read_csv like parse_dates [parses string date values]

colab_path = "https://raw.githubusercontent.com/poornagurram/TimeSeriesAnalysis_ODSC_2019/master/"
ts_data = pd.read_csv(colab_path+"data/stock_data.csv")

In [0]:
# A look at the head of the dataframe
ts_data.head()

In [0]:
#lets check the datatypes of columns in dataframe

ts_data.info()

### A Quick look at pandas data types

![dtypes](https://github.com/poornagurram/TimeSeriesAnalysis_ODSC_2019/blob/master/images/pandas_dtypes.png?raw=1)

In [0]:
ts_data.describe()

In [0]:
#converts string type to datetime format
ts_data['Date']= pd.to_datetime(ts_data['date']) 
ts_data.info()

In [0]:
# read csv by parsing dates
df = pd.read_csv(colab_path+'data/stock_data.csv', parse_dates=['date'], index_col=0)

In [0]:
df

In [0]:
# getting data using date index
# [from 2008 october to 2009 january]
df['2008 10':'2009 01']

In [0]:
# Truncates a sorted DataFrame/Series before and/or after some
# particular index value. If the axis contains only datetime values,
# before/after parameters are converted to datetime values.
df.truncate?

In [0]:
df.truncate(before='2008 10', after='2009')

In [0]:
# parsing date from multiple columns
pd.read_csv(colab_path+'data/sample_2.csv').head()

In [0]:
# combining all date columns to get date
pd.read_csv(colab_path+'data/sample_2.csv', parse_dates={'date':[0,1,2]}, index_col='date').head()

## dateparser

In [0]:
pd.read_csv(colab_path+'data/sample.csv')

In [0]:
pd.read_csv(colab_path+'data/sample.csv', parse_dates= ['x']).info()

In [0]:
from datetime import datetime

In [0]:
datetime.strptime('2018-11-01T12-12-00', '%Y-%m-%dT%H-%M-%S')

In [0]:
# Specifying the format to parse the datetime
# Find out various format specifiers here (https://docs.python.org/3/library/datetime.html#strftime-and-strptime-behavior)
def dateparse(x):
    return datetime.strptime(x, '%Y-%m-%dT%H-%M-%S')

In [0]:
#pd.Timestamp('2018-11-01T0-10-1')

In [0]:
dateparse('2018-11-01T0-10-1')

In [0]:
pd.read_csv(colab_path+'/data/sample.csv', parse_dates=['x'], date_parser=dateparse)

## Write data

In [0]:
# Creating date index with start and end having frequency of second
date_index = pd.date_range(start='20181217', freq='S', end='20181221')

In [0]:
len(date_index)

In [0]:
# Generated Index
date_index

In [0]:
# Creating the dataframe with the above generated index
df = pd.DataFrame(data=np.random.randint(0,100, len(date_index)), index=date_index)

In [0]:
#changing name of the coloumn 
df.columns = ['Value']

In [0]:
#df.head()

In [0]:
# Exporting data to csv
df.to_csv('test_data.csv')

# Exercise

In [0]:
# Q1:
# 1. read "../data/exercise_sample.csv"
# 2. change first column datatype to datetime
# 4. Make the first column index
# 5. Reverse the index order
# 6. export it to cleaned_sample.csv