# Basics: [DataFrames](https://www.datacamp.com/community/tutorials/pandas-tutorial-dataframe-python#gs.Ulu69Pg)

In [1]:
from __future__ import absolute_import, division, print_function
import pandas as pd
import numpy as np

## Creation

In [2]:
arr = np.array([[1, 2, 3], [4, 5, 6]])
print("From a numpy array:", pd.DataFrame(arr), sep="\n")

dic = {1: ['1', '3'], 2: ['1', '2']}
print("Frum a dictionary:", pd.DataFrame(dic), sep="\n")

df = pd.DataFrame(data=[4, 5, 6, 7], index=range(0, 4), columns=['A'])
print("From a df:", pd.DataFrame(df), sep="\n")

From a numpy array:
   0  1  2
0  1  2  3
1  4  5  6
Frum a dictionary:
   1  2
0  1  1
1  3  2
From a df:
   A
0  4
1  5
2  6
3  7


In [3]:
np.info(df.drop)

 drop(labels, axis=0, level=None, inplace=False, errors='raise')

Return new object with labels in requested axis removed.

Parameters
----------
labels : single label or list-like
axis : int or axis name
level : int or level name, default None
    For MultiIndex
inplace : bool, default False
    If True, do operation inplace and return None.
errors : {'ignore', 'raise'}, default 'raise'
    If 'ignore', suppress error and existing labels are dropped.

    .. versionadded:: 0.16.1

Returns
-------
dropped : type of caller


## Inspection

In [16]:
df = pd.DataFrame(np.array([[1,2,3], [4, 5, 6]]))
print("Shape:", df.shape)
print("Height:", len(df.index), end="\n\n") 

Shape: (2, 3)
Height: 2



## Indexing

In [34]:
# Define example df:
arr = np.arange(1, 10).reshape(3, 3)
df = pd.DataFrame(arr, columns=['A', 'B', 'C'])
print("The examples below all access the top-left '1' in . . . \n", df.head(), "\n")

# iloc and loc are the 2 main ones.
df.iloc[0][0]
df.loc[0]['A']
v
df.at[0, 'A']
df.iat[0, 0]
df.get_value(0, 'A')

The examples below all access the top-left '1' in . . . 
    A  B  C
0  1  2  3
1  4  5  6
2  7  8  9 



1

## Expanding/Removing/Changing Data  from Rows/Columns

#### Adding a New Column

In [8]:
# Append list as column.
new_df = df.assign(new_col = pd.Series(np.random.randint(10, size=len(df))).values)
# Append column with name 'name'. Values initialized to row index. 
new_df['name'] = new_df.index
new_df.head()

Unnamed: 0,A,new_col,name
0,4,0,0
1,5,3,1
2,6,6,2
3,7,4,3


## Changing Data

#### Renaming Columns

In [None]:
# Check out your DataFrame `df`
print(df)

# Define the new names of your columns
newcols = {
    'A': 'new_column_1', 
    'B': 'new_column_2', 
    'C': 'new_column_3'
}

# Use `rename()` to rename your columns
df.rename(columns=newcols, inplace=True)

# Rename your index
df.rename(index={1: 'a'})

#### Replacing String Patterns with Regex

In [28]:
# Check out your DataFrame `df`
arr = np.arange(1, 10).reshape(3, 3)
arr = [[str(r) for r in row] for row in arr]
arr[0][1] += '\n'
arr[1][0] += '\n'
arr[2][2] += '\n'
df = pd.DataFrame(arr)
print("Before:\n", df)

df.replace({'\n': '<br>'}, regex=True, inplace=True)
print("After:\n", df)

Before:
      0    1    2
0    1  2\n    3
1  4\n    5    6
2    7    8  9\n
After:
        0      1      2
0      1  2<br>      3
1  4<br>      5      6
2      7      8  9<br>


In [None]:
print(df)
# Replace strings by others with `regex`
df.replace({'\n': '<br>'}, regex=True, inplace=True)
print(df)

# Customizing Display Options 

In [11]:
df = pd.DataFrame({'text': ['foo foo foo foo foo foo foo foo', 'bar bar bar bar bar'],
                 'number': [1, 2]})
df.head()

Unnamed: 0,number,text
0,1,foo foo foo foo foo foo foo foo
1,2,bar bar bar bar bar


In [14]:
df.style.set_properties(subset=['text'], **{'width': '800px'})
np.info(df.style.set_properties)

 set_properties(subset=None, **kwargs)

Convience method for setting one or more non-data dependent
properties or each cell.

.. versionadded:: 0.17.1

Parameters
----------
subset: IndexSlice
    a valid slice for ``data`` to limit the style application to
kwargs: dict
    property: value pairs to be set for each cell

Returns
-------
self : Styler

Examples
--------
>>> df = pd.DataFrame(np.random.randn(10, 4))
>>> df.style.set_properties(color="white", align="right")
>>> df.style.set_properties(**{'background-color': 'yellow'})


# Saving and Loading Data

In [44]:
import pandas as pd
import numpy as np
loss = [0.12, 0.165]
learning_rate= [0.5, 0.1]
df = pd.DataFrame({"loss": loss, "learning_rate": learning_rate})
df.head()

Unnamed: 0,learning_rate,loss
0,0.5,0.12
1,0.1,0.165


#### Creating/Appending and Saving DF to File
Creates file if exists, else appends to existing one. Useful for repeated updates to data files.

In [45]:
# When opening to new and/or empty file, do . . . 
with open('io_test.csv', 'a+') as f:
    df.to_csv(f)
# When you know the file already exists & isn't empty, do . . . 
with open('io_test.csv', 'a') as f:
    # Don't include header in appended content.
    df.to_csv(f, header=False)

In [46]:
%%bash
cat io_test.csv

,learning_rate,loss
0,0.5,0.12
1,0.1,0.165


#### Loading From CSV Into DF

In [47]:
df_load = pd.read_csv('io_test.csv', index_col=0)
df_load.head()

Unnamed: 0,learning_rate,loss
0,0.5,0.12
1,0.1,0.165


In [48]:
df_load = df_load.append({"loss":200, "learning_rate":0.01}, ignore_index=True)
df_load.head()

Unnamed: 0,learning_rate,loss
0,0.5,0.12
1,0.1,0.165
2,0.01,200.0
