# This is a collection of Python snippets

## Working with Widgets in Notebooks

https://medium.com/@jdchipox/how-to-interact-with-jupyter-33a98686f24e

### Containers

In [6]:
# some handy functions to use along widgets
from IPython.display import display, Markdown, clear_output
# widget packages
import ipywidgets as widgets# defining some widgets
text = widgets.Text(
       value='My Text',
       description='Title', )

calendar = widgets.DatePicker(
           description='Select Date')

slider = widgets.FloatSlider(
         value=1,
         min=0,
         max=10.0,
         step=0.1,)

menu = widgets.Dropdown(
       options=['red', 'blue', 'green'],
       value='red',
       description='Color:')

checkbox = widgets.Checkbox(
           description='Check to invert',)

In [7]:
# You can display any of them by calling the object on a cell
calendar

DatePicker(value=None, description='Select Date')

In [8]:
# Use a container widget to display individual elements together. They can be nested inside the other

box = widgets.VBox([text, slider, menu, calendar, checkbox ])
box

widgets.HBox([box, box])

HBox(children=(VBox(children=(Text(value='My Text', description='Title'), FloatSlider(value=1.0, max=10.0), Dr…

### A Tabbed container

In [9]:
# A Tab container

# defining a list with the contents of our windows
children = [box, box]# initializing a tab
tab = widgets.Tab()
# setting the tab windows 
tab.children = children# changing the title of the first and second window
tab.set_title(0, 'box')
tab.set_title(1, 'copy of box')

tab

Tab(children=(VBox(children=(Text(value='My Text', description='Title'), FloatSlider(value=1.0, max=10.0), Dro…

In [10]:
# Inspecting and using values of widgets

# current value of text, checkbox and calendar 
print('Widget text has value {}, of {}'.format(text.value, type(text.value)))

print('Widget checkbox has value {}, of {}'.format(checkbox.value, type(checkbox.value)))

print('Widget calendar has value {}, of {}'.format(calendar.value, type(calendar.value)))

Widget text has value My Text, of <class 'str'>
Widget checkbox has value False, of <class 'bool'>
Widget calendar has value None, of <class 'NoneType'>


### Dataset explorer and printing values

In [None]:
# Exploring a dataset

import pandas as pd
import math
import ipywidgets as widgets
from IPython.display import display, clear_output, Markdown
df = pd.read_csv('https://raw.githubusercontent.com/diegopenilla/Some_Data/master/Downloads/trainingData.csv')



# DROPDOWN MENU FOR SELECTING USER-ID
users = widgets.Dropdown(
    options=list(set(df['USERID'])),
    value=1,
    description='USER-ID:',
    disabled=False)

dicti = {} # Initialize dictionary for holding {building(s): floor(s)} for each user.

# Button to display info about user
buttonuser_info = widgets.Button(description='Show Info')
outuser_info = widgets.Output()
def on_buttonuser_info_clicked(b):
    with outuser_info:
        clear_output()
        selection = df[df['USERID']==users.value]
        visited_places = len(set(selection['SPACEID']))
        dicti = {i: 0 for i in list(set(selection['BUILDINGID']))}
        for key, val in dicti.items():
            dicti[key] = list(set(df.loc[(df["USERID"] ==users.value) & (df["BUILDINGID"] == key), "FLOOR"]))
        
        # Extract distance travelled 
        lat = list(selection["LATITUDE"])
        long = list(selection["LONGITUDE"])
        
        # Calculate distance between one point and the next
        total = []
        for latitude, longitude in zip(lat, long):
            total.append((latitude, longitude))
        rez = []
        
        # computing distance as the distance between the coordinates x,y from one point to the next.
        # (we sorted our dataframe by time, and now we are accesing data from a user)
        for i in range(len(total)-1):
            rez.append(math.hypot(total[i+1][0] - total[i][0], total[i+1][1] - total[i][1]))   
        distance_travelled = sum(rez)
        
        # display, the information in Markdown to make it look better.
        display(Markdown('''For User {}, there are ${}$ observations from ${}$ WAP (*Wireless Access Points*)
- Beginning in {} and ending in {}
- Using smartphone {}
- The user travelled {:.2f} $m$ (does not take into account vertical movemement, change of floors)
- Visited places {}
- In {} locations with different SPACEID'''.format(users.value, selection.shape[0], selection.shape[1]-9,         
                                                   str(selection['TIMESTAMP'].min()), str(selection['TIMESTAMP'].max()),
                                                   str(list(set(selection['PHONEID']))[0]), distance_travelled,
                                                   str(dicti),len(set(selection['SPACEID'])))))

# Links button: buttonuser_info to its output function      
buttonuser_info.on_click(on_buttonuser_info_clicked)

# Display
display(widgets.VBox([users,buttonuser_info, outuser_info]))

### Filter a dataframe by value in a column

In [2]:
import pandas as pd
import ipywidgets as widgets
from ipywidgets import interact, interact_manual

# read in the csv file
df = pd.read_csv('pivot_raw_data.csv')

df.head()

Unnamed: 0,Date_of_Week,Year,Month,Channel,Brand,Country,Category,Partnership,NMV,Item,ASP,PV
0,18/2/18,2018,Feb,LLLDD,AV,TH,Health & Beauty,MP,18853.22964,1243.42604,15.162325,41129
1,28/1/18,2018,Jan,LLLDD,AV,SG,Health & Beauty,MP,0.0,0.0,,440
2,14/1/18,2018,Jan,LLLDD,AV,TH,Health & Beauty,MP,17608.25665,1213.82701,14.506397,43215
3,4/2/18,2018,Feb,LLLDD,AV,SG,Health & Beauty,MP,0.0,0.0,,564
4,21/1/18,2018,Jan,LLLDD,AV,TH,Health & Beauty,MP,18179.10355,1245.011,14.601561,44818


In [None]:
@interact
def show_nmv_more_than(column=['ASP', 'NMV', 'PV', 'Item'], 
                            x=(10, 100000, 20000)):
    return df.loc[df[column] > x]

### Widgets for Plots

## Show installed versions/dependencies

In [3]:
pd.__version__

'0.24.1'

In [4]:
pd.show_versions()


INSTALLED VERSIONS
------------------
commit: None
python: 3.6.7.final.0
python-bits: 64
OS: Windows
OS-release: 10
machine: AMD64
processor: Intel64 Family 6 Model 142 Stepping 9, GenuineIntel
byteorder: little
LC_ALL: None
LANG: None
LOCALE: None.None

pandas: 0.24.1
pytest: 4.0.2
pip: 19.1.1
setuptools: 41.0.1
Cython: 0.29.5
numpy: 1.17.1
scipy: 1.3.1
pyarrow: None
xarray: None
IPython: 7.6.1
sphinx: 1.8.4
patsy: 0.5.1
dateutil: 2.7.3
pytz: 2018.7
blosc: None
bottleneck: None
tables: None
numexpr: None
feather: None
matplotlib: 3.1.0
openpyxl: 2.5.12
xlrd: 1.2.0
xlwt: None
xlsxwriter: 1.1.2
lxml.etree: 4.2.5
bs4: 4.6.3
html5lib: None
sqlalchemy: 1.2.18
pymysql: None
psycopg2: None
jinja2: 2.10.1
s3fs: None
fastparquet: None
pandas_gbq: None
pandas_datareader: 0.7.0
gcsfs: None


## Rename columns

The most flexible method for renaming columns is the `rename()` method. You pass it a dictionary in which the keys are the old names and the values are the new names, and you also specify the axis:

In [5]:
df = df.rename({'col one':'col_one', 'col two':'col_two'}, axis='columns')

The best thing about this method is that you can use it to rename any number of columns, whether it be just one column or all columns.

Now if you're going to rename all of the columns at once, a simpler method is just to overwrite the columns attribute of the DataFrame:

In [None]:
# df.columns = ['col_one', 'col_two']

## Replace column spaces with underscores, add prefixes

In [12]:
df.columns = df.columns.str.replace(' ', '_')
df.add_prefix('X_')
df.add_suffix('_Y').head()

Unnamed: 0,Date_of_Week_Y,Year_Y,Month_Y,Channel_Y,Brand_Y,Country_Y,Category_Y,Partnership_Y,NMV_Y,Item_Y,ASP_Y,PV_Y
0,18/2/18,2018,Feb,LLLDD,AV,TH,Health & Beauty,MP,18853.22964,1243.42604,15.162325,41129
1,28/1/18,2018,Jan,LLLDD,AV,SG,Health & Beauty,MP,0.0,0.0,,440
2,14/1/18,2018,Jan,LLLDD,AV,TH,Health & Beauty,MP,17608.25665,1213.82701,14.506397,43215
3,4/2/18,2018,Feb,LLLDD,AV,SG,Health & Beauty,MP,0.0,0.0,,564
4,21/1/18,2018,Jan,LLLDD,AV,TH,Health & Beauty,MP,18179.10355,1245.011,14.601561,44818


## Select columns by dtype

In [14]:
df.select_dtypes(include='object').head()

Unnamed: 0,Date_of_Week,Month,Channel,Brand,Country,Category,Partnership
0,18/2/18,Feb,LLLDD,AV,TH,Health & Beauty,MP
1,28/1/18,Jan,LLLDD,AV,SG,Health & Beauty,MP
2,14/1/18,Jan,LLLDD,AV,TH,Health & Beauty,MP
3,4/2/18,Feb,LLLDD,AV,SG,Health & Beauty,MP
4,21/1/18,Jan,LLLDD,AV,TH,Health & Beauty,MP


In [15]:
df.select_dtypes(include='number').head()

Unnamed: 0,Year,NMV,Item,ASP,PV
0,2018,18853.22964,1243.42604,15.162325,41129
1,2018,0.0,0.0,,440
2,2018,17608.25665,1213.82701,14.506397,43215
3,2018,0.0,0.0,,564
4,2018,18179.10355,1245.011,14.601561,44818


## Convert strings to numbers

In [17]:
df.astype({'NMV':'float', 'PV':'float'}).dtypes

Date_of_Week     object
Year              int64
Month            object
Channel          object
Brand            object
Country          object
Category         object
Partnership      object
NMV             float64
Item            float64
ASP             float64
PV              float64
dtype: object

However, this would have resulted in an error if you tried to use it on the third column, because that column contains a dash to represent zero and pandas doesn't understand how to handle it.

Instead, you can use the `to_numeric()` function on the third column and tell it to convert any invalid input into `NaN` values:

In [16]:
pd.to_numeric(df.ASP, errors='coerce')

0       15.162325
1             NaN
2       14.506397
3             NaN
4       14.601561
5       17.776520
6             NaN
7             NaN
8       15.509837
9             NaN
10            NaN
11      15.281041
12      15.226416
13      14.722315
14      14.486268
15      12.503173
16            NaN
17      12.503173
18      12.503173
19            NaN
20            NaN
21      15.561198
22      14.714657
23      15.185031
24      14.945724
25      14.343283
26       1.993718
27       2.733806
28       1.713752
29       3.289496
          ...    
3046     1.140962
3047          NaN
3048          NaN
3049     5.913965
3050     1.798230
3051     1.706737
3052     1.433179
3053     1.235222
3054     1.738556
3055     1.794630
3056     1.433179
3057     1.696422
3058     1.232084
3059     1.717870
3060     1.870599
3061     1.433179
3062     1.433179
3063     1.266001
3064     1.430012
3065     1.499932
3066     1.626143
3067     1.455844
3068     1.707713
3069     1.119830
3070     1