# Testing functions in epydemiology

## Import epydemiology
(All other packages will be imported or reported missing.)

In [1]:
%matplotlib inline

In [2]:
import numpy as np
import pandas as pd

import epydemiology as epy

Error: pyxdameraulevenshtein package not installed. Some features may not be available.


## Some background details

In [None]:
help(epy)

In [None]:
print(dir(epy))

---

## FILE: phjGetData

### FUNCTION: phjReadDataFromExcelNamedCellRange()

In [None]:
phjPath = "/Users/philipjones/Documents/git_repositories/epydemiology"
phjFileName = "Test data.xlsx"

import pandas as pd
import openpyxl
import epydemiology as epy

print("RANGE: some_test_data")
print("=====================")
myDF = epy.phjReadDataFromExcelNamedCellRange(phjExcelPathAndFileName = '/'.join([phjPath,phjFileName]),
                                              phjExcelCellRangeName = 'some_test_data',
                                              phjDatetimeFormat = "%d%b%Y",
                                              phjMissingValue = "missing",
                                              phjHeaderRow = True,
                                              phjPrintResults = True)

print(myDF.dtypes)

print('\n')

print("RANGE: some_more_test_data")
print("==========================")
myDF2 = epy.phjReadDataFromExcelNamedCellRange(phjExcelPathAndFileName = '/'.join([phjPath,phjFileName]),
                                               phjExcelCellRangeName = 'some_more_test_data',
                                               phjDatetimeFormat = "%Y-%m-%d",
                                               phjMissingValue = "missing",
                                               phjHeaderRow = True,
                                               phjPrintResults = True)

print(myDF.dtypes)

---

## FILE: phjGetDBData

### FUNCTION: phjConnectToDatabase()

In [None]:
import pymysql
import pymssql
import epydemiology as epy

tempConn = epy.phjConnectToDatabase('mysql')

print(tempConn)

### FUNCTION: phjGetDataFromDatabase()

In [None]:
myDF = epy.phjGetDataFromDatabase(phjQueryPathAndFile = '/path_to_directory/theSQLQueryFile.mysql',
                                  phjPrintResults = True)

myDF = epy.phjGetDataFromDatabase(phjQueryStr = 'SELECT * FROM Table1',
                                  phjPrintResults = True)

---

## FILE: phjMiscFuncs

### FUNCTION: phjGetStrFromArgOrFile()

### FUNCTION: phjReadTextFromFile()

### FUNCTION: phjCreateNameGroupRegex()

### FUNCTION: phjFindRegexNamedGroup()

### FUNCTION: phjMaxLevelOfTaxonomicDetail()

### FUNCTION: phjReverseMap()

#### Example 1 – exact string matches

In [None]:
myDF = pd.DataFrame({'id':[1,2,3,4,5,6,7],
                     'var':['dogg','canine','cannine','catt','felin','cot','feline'],
                     'dog':[1,2,3,4,5,6,7]})

print(myDF)

d = {'dog':['dogg','canine','cannine'],
     'cat':['catt','felin','feline']}

In [None]:
myDF = epy.phjReverseMap(phjDF = myDF,
                         phjMappingDict = d,
                         phjCategoryVarName = 'var',
                         phjMappedVarName = 'spp',
                         phjUnmapped = 'missing',
                         phjTreatAsRegex = False,
                         phjDropPreExisting = True,
                         phjPrintResults = True)

#### Example 2 – regex

In [None]:
myDF = pd.DataFrame({'id':[1,2,3,4,5,6,7],
                     'var':['dogg','canine','cannine','catt','felin','cot','feline'],
                     'dog':[1,2,3,4,5,6,7]})

print(myDF)
print('\n')
       
d = {'dog':['(?:dog+)','(?:can*ine)'],
     'cat':['(?:cat+)','(?:fel+ine?)']}

print(d)

In [None]:
myDF = epy.phjReverseMap(phjDF = myDF,
                         phjMappingDict = d,
                         phjCategoryVarName = 'var',
                         phjMappedVarName = 'new',
                         phjUnmapped = 'missing',
                         phjTreatAsRegex = True,
                         phjDropPreExisting = True,
                         phjPrintResults = True)

### FUNCTION: phjRetrieveUniqueFromMultiDataFrames()

This function takes one or more Pandas dataframes and returns a dataframe containing the unique values from the listed variables.
```python
phjOutDF = epy.phjRetrieveUniqueFromMultiDataFrames(phjDFList,
                                                    phjVarNameList,
                                                    phjSort = True,
                                                    phjPrintResults = False)
                                                    
phjDFList – list of dataframes
phjVarNameList – list of variable names. Single column can be entered as a string.
phjSort (default = True) – sort returned dataframe
phjPrintResults (default = False) – print results
```

#### Single dataframe

In [4]:
phjTempDF = pd.DataFrame({'a':[1,2,3,4,5,6,1,2,3,4,5,6],
                          'b':['a','b','c','d','e','f','a','b','w','d','e','f']})
print('Original')
print('--------')
print(phjTempDF)
print('\n')
 
phjOutDF = epy.phjRetrieveUniqueFromMultiDataFrames(phjDFList = [phjTempDF],
                                                    phjVarNameList = 'c',
                                                    phjSort = True,
                                                    phjPrintResults = False)
 
print('Out1')
print('----')
print(phjOutDF)
print('\n')
 
phjOutDF = epy.phjRetrieveUniqueFromMultiDataFrames(phjDFList = phjTempDF,
                                                    phjVarNameList = ['a','b','c'],
                                                    phjSort = True,
                                                    phjPrintResults = False)
 
print('Out2')
print('----')
print(phjOutDF)

Original
--------
    a  b
0   1  a
1   2  b
2   3  c
3   4  d
4   5  e
5   6  f
6   1  a
7   2  b
8   3  w
9   4  d
10  5  e
11  6  f


An AssertionError occurred in phjRetrieveUniqueFromMultiDataFrames() function. (The elements in 'phjVarNameList' ('['c']') do not all exist in list of columns.)

Out1
----
None


An AssertionError occurred in phjRetrieveUniqueFromMultiDataFrames() function. (The elements in 'phjVarNameList' ('['a', 'b', 'c']') do not all exist in list of columns.)

Out2
----
None


#### Multiple dataframes of data

In [None]:
df1 = pd.DataFrame({'m':[1,2,3,4,5,6],
                    'n':['a','b','c','d','e','f']})
 
df2 = pd.DataFrame({'m':[2,5,7,8],
                    'n':['b','e','g','h']})

print('First dataframe')
print('---------------')
print(df1)
print('\n')
print('Second dataframe')
print('----------------')
print(df2)
print('\n')
 
phjOutDF = epy.phjRetrieveUniqueFromMultiDataFrames(phjDFList = [df1,df2],
                                                    phjVarNameList = ['m','n'],
                                                    phjSort = True,
                                                    phjPrintResults = False)

print('Dataframe of unique values')
print('--------------------------')
print(phjOutDF)

### FUNCTION: phjUpdateLUT()

This function takes two dataframes, one considered to contain existing values in a database, for example, and a second dataframe containing a new set of values. The function produces 

#### Testing phjUpdateLUT() function with dataframe with single column

In [None]:
old_df = pd.DataFrame({'id':[1,2,3,4,5,6],
                       'm':['a','b','c','d','e','f']})
 
new_df = pd.DataFrame({'id':[1,2,3,4],
                       'm':['b','e','g','h']})
 
update_df = epy.phjUpdateLUT(phjExistDF = old_df,
                             phjNewDF = new_df,
                             phjIDName = 'id',
                             phjVarName = ['m'],
                             phjMissStr = 'missing',
                             phjMissCode = 999,
                             phjPrintResults = True)
 
print('Updated dataframe')
print('-----------------')
print(update_df)

#### Testing phjUpdateLUT() function with dataframe with multiple columns

In [None]:
old_df = pd.DataFrame({'id':[1,2,3,4,5,6],
                       'm':['a','b','c','d','e','f'],
                       'n':['A','B','C','D','E','F']})
 
new_df = pd.DataFrame({'id':[1,2,3,4],
                       'm':['b','e','g','h'],
                       'n':['BB','E','GG','H']})
 
update_df = epy.phjUpdateLUT(phjExistDF = old_df,
                             phjNewDF = new_df,
                             phjIDName = 'id',
                             phjVarName = ['m','n'],
                             phjMissStr = 'missing',
                             phjMissCode = 999,
                             phjPrintResults = True)
 
print('Updated dataframe')
print('-----------------')
print(update_df)

---

## FILE: phjMatrices

### FUNCTION: phjBinaryVarsToSquareMatrix()

#### Output a numpy array

In [None]:
rawDataDF = pd.DataFrame({'a':[0,1,1,1,0,0,1,0],
                          'b':[1,1,0,0,1,0,0,1],
                          'c':[0,0,1,0,1,1,1,1],
                          'd':[1,0,0,0,1,0,0,0],
                          'e':[1,0,0,0,0,1,0,0]})

columns = ['a','b','c','d','e']

print('Raw data')
print(rawDataDF)
print('\n')

phjMatrix = epy.phjBinaryVarsToSquareMatrix(phjDataDF = rawDataDF,
                                        phjColumnNamesList = columns,
                                        phjOutputFormat = 'arr',
                                        phjPrintResults = False)
                                        
print('Returned square matrix')
print(phjMatrix)

#### Output a Pandas dataframe

In [None]:
rawDataDF = pd.DataFrame({'a':[0,1,1,1,0,0,1,0],
                          'b':[1,1,0,0,1,0,0,1],
                          'c':[0,0,1,0,1,1,1,1],
                          'd':[1,0,0,0,1,0,0,0],
                          'e':[1,0,0,0,0,1,0,0]})

columns = ['a','b','c','d','e']

print('Raw data')
print(rawDataDF)
print('\n')

phjMatrix = epy.phjBinaryVarsToSquareMatrix(phjDataDF = rawDataDF,
                                            phjColumnNamesList = columns,
                                            phjOutputFormat = 'df',
                                            phjPrintResults = False)
                                        
print('Returned square matrix')
print(phjMatrix)

### FUNCTION: phjLongToWideBinary()

---

## FILE: phjCalculateProportions

### FUNCTION: phjCalculateBinomialProportions()

In [None]:
# Example calculating binomial proportions (using phjCaculateBinomialProportions() function)
# ========================================

# Create example dataset
phjTempDF = pd.DataFrame({'group':['g1','g1','g2','g1','g2','g2','g1','g1','g2','g1'],
                          'A':['yes','yes','no','no','no','no','no','yes',np.nan,'yes'],
                          'B':['no',np.nan,np.nan,'yes','yes','yes','yes','no','no','no'],
                          'C':['yes','yes','yes',np.nan,'no','yes','yes','yes','no','no']})

print(phjTempDF)
print('\n')

phjPropDF = epy.phjCalculateBinomialProportions(phjTempDF = phjTempDF,
                                                phjColumnsList = ['A','B','C'],
                                                phjSuccess = 'yes',
                                                phjGroupVarName = 'group',
                                                phjMissingValue = 'missing',
                                                phjBinomialConfIntMethod = 'wilson',
                                                phjAlpha = 0.05,
                                                phjPlotProportions = True,
                                                phjGroupsToPlotList = 'all',
                                                phjSortProportions = True,
                                                phjGraphTitle = None,
                                                phjPrintResults = False)

print(phjPropDF)

### FUNCTION: phjCalculateBinomialConfInts()

### FUNCTION: phjCalculateMultinomialProportions()

In [None]:
# Example of calculating multinomial proportions (using phjCalculateMultinomialProportions() function)
# ==============================================

# Create example dataset
phjTempDF = pd.DataFrame({'group':['case','case','case','control','control','case','case','case','control','control','control','control','case','case','case','control','control','control','control','case','case','case','case','case',np.nan,np.nan],
                          'category':[np.nan,'spaniel','missing','terrier','collie','labrador','labrador','collie','spaniel','spaniel','labrador','collie','terrier','terrier','terrier','collie','labrador','labrador','labrador','spaniel','spaniel','collie','collie','collie','terrier','spaniel'],
                          'catint':[1,2,3,2,3,2,1,2,1,2,3,2,3,2,3,1,2,3,2,3,2,3,2,3,1,2]})

print(phjTempDF)
print('\n')

phjRelFreqDF = epy.phjCalculateMultinomialProportions(phjTempDF = phjTempDF,
                                                      phjCategoryVarName = 'category',
                                                      phjGroupVarName = 'group',
                                                      phjMissingValue = 'missing',
                                                      phjMultinomialConfIntMethod = 'goodman',
                                                      phjAlpha = 0.05,
                                                      phjPlotRelFreq = True,
                                                      phjCategoriesToPlotList = 'all',
                                                      phjGroupsToPlotList = 'all',   # Currently not implemented
                                                      phjGraphTitle = 'Relative frequencies (Goodman CI)',
                                                      phjPrintResults = True)

print(phjRelFreqDF)

### FUNCTION: phjSummaryTableToBinaryOutcomes()

### FUNCTION: phjAnnualDiseaseTrend()

In [None]:
phjDiseaseDF = pd.DataFrame({'year':[2008,2009,2010,2011,2012,2013,2014,2015,2016,2017,2018],
                             'positive':[18,34,24,26,30,27,36,17,18,15,4],
                             'negative':[1695,1733,1929,1517,1449,1329,1130,928,753,496,325]})

phjDiseaseDF = epy.phjAnnualDiseaseTrend(phjDF = phjDiseaseDF.loc[phjDiseaseDF['year'] < 2018,:],
                                         phjYearVarName = 'year',
                                         phjPositivesVarName = 'positive',
                                         phjNegativesVarName = 'negative',
                                         phjTotalVarName = None,
                                         phjConfIntMethod = 'normal',
                                         phjAlpha = 0.05,
                                         phjPlotProportions = True,
                                         phjPlotPrediction = True,
                                         phjGraphTitleStr = None,
                                         phjPrintResults = False)

---

## FILE: phjCleanUKPostcodes.py

### FUNCTION: phjCleanUKPostcodeVariable()

#### Clean postcodes based on format alone

In [None]:
import re

# Create test dataframe
myTestPostcodeDF = pd.DataFrame({'postcode': ['NP45DG',
                                              'CH647TE',
                                              'CH5 4HE',
                                              'GIR 0AA',
                                              'NOT NOWN',
                                              'GIR0AB',
                                              'NOR12A',
                                              'no idea',
                                              'W1A 1AA',
                                              'missin',
                                              'NP4  OGH',
                                              'P012 OLL',
                                              'p01s',
                                              'ABCD',
                                              '',
                                              'ab123cd',
                                              'un-known',
                                              'B1    INJ',
                                              'AB123CD',
                                              'No idea what the postcode is',
                                              '    ???NP4-5DG_*#   '],
                                 'pcdClean': np.nan,
                                 'pcd7': np.nan,
                                 'postcodeOutward': np.nan,
                                 'someOtherCol': np.nan})

# Run function to extract postcode data
print('\nStart dataframe\n===============\n')
print(myTestPostcodeDF)
print('\n')

myTestPostcodeDF = epy.phjCleanUKPostcodeVariable(phjDF = myTestPostcodeDF,
                                                  phjRealPostcodeSer = None,
                                                  phjOrigPostcodeVarName = 'postcode',
                                                  phjNewPostcodeVarName = 'pcdClean',
                                                  phjNewPostcodeStrLenVarName = 'pcdCleanStrLen',
                                                  phjPostcodeCheckVarName = 'pcdFormatCheck',
                                                  phjMissingValueCode = 'missing',
                                                  phjMinDamerauLevenshteinDistanceVarName = 'minDamLevDist',
                                                  phjBestAlternativesVarName = 'bestAlternatives',
                                                  phjPostcode7VarName = 'pcd7',
                                                  phjPostcodeAreaVarName = 'pcdArea',
                                                  phjSalvageOutwardPostcodeComponent = True,
                                                  phjCheckByOption = 'format',
                                                  phjDropExisting = True,
                                                  phjPrintResults = True)

print('\nReturned dataframe\n==================\n')
print(myTestPostcodeDF)

#### Clean postcodes based on real postcode and identify closest matches

In [None]:
import re

# N.B. When calculating best alternative postcodes, only postcodes that are within
# 1 DL distance are considered.

# Create a Pandas series that could contain all the postcodes in the UK
realPostcodesSer = pd.Series(['NP4 5DG','CH647TE','CH5 4HE','W1A 1AA','NP4 0GH','PO120LL','AB123CF','AB124DF','AB123CV'])

# Create test dataframe
myTestPostcodeDF = pd.DataFrame({'postcode': ['NP45DG',
                                              'CH647TE',
                                              'CH5 4HE',
                                              'GIR 0AA',
                                              'NOT NOWN',
                                              'GIR0AB',
                                              'NOR12A',
                                              'no idea',
                                              'W1A 1AA',
                                              'missin',
                                              'NP4  OGH',
                                              'P012 OLL',
                                              'p01s',
                                              'ABCD',
                                              '',
                                              'ab123cd',
                                              'un-known',
                                              'B1    INJ',
                                              'AB123CD',
                                              'No idea what the postcode is',
                                              '    ???NP4-5DG_*#   '],
                                 'pcdClean': np.nan,
                                 'pcd7': np.nan,
                                 'postcodeOutward': np.nan,
                                 'someOtherCol': np.nan})

# Run function to extract postcode data
print('\nStart dataframe\n===============\n')
print(myTestPostcodeDF)
print('\n')

myTestPostcodeDF = epy.phjCleanUKPostcodeVariable(phjDF = myTestPostcodeDF,
                                                  phjRealPostcodeSer = realPostcodesSer,
                                                  phjOrigPostcodeVarName = 'postcode',
                                                  phjNewPostcodeVarName = 'pcdClean',
                                                  phjNewPostcodeStrLenVarName = 'pcdCleanStrLen',
                                                  phjPostcodeCheckVarName = 'pcdFormatCheck',
                                                  phjMissingValueCode = 'missing',
                                                  phjMinDamerauLevenshteinDistanceVarName = 'minDamLevDist',
                                                  phjBestAlternativesVarName = 'bestAlternatives',
                                                  phjPostcode7VarName = 'pcd7',
                                                  phjPostcodeAreaVarName = 'pcdArea',
                                                  phjSalvageOutwardPostcodeComponent = True,
                                                  phjCheckByOption = 'dictionary',
                                                  phjDropExisting = True,
                                                  phjPrintResults = True)

print('\nReturned dataframe\n==================\n')
print(myTestPostcodeDF)

### FUNCTION: phjPostcodeFormat7()

---

## FILE: phjCleanData

### FUNCTION: phjParseDateVar()

---

## FILE: phjExploreData

### FUNCTION: phjViewLogOdds()
Example of viewing log odds plotted against mid-point of categories.

#### Categorise using Jenks breaks and using 'yes' and 'no' as binary outcome

In [None]:
# Define example dataset
phjTempDF = pd.DataFrame({'binDepVar':['yes']*50000 + ['no']*50000,
                          'riskFactorCont':np.random.uniform(0,1,100000)})

with pd.option_context('display.max_rows', 10, 'display.max_columns', 5):
    print(phjTempDF)

    
# View log odds
phjTempDF = epy.phjViewLogOdds(phjTempDF = phjTempDF,
                               phjBinaryDepVarName = 'binDepVar',
                               phjContIndepVarName = 'riskFactorCont',
                               phjCaseValue = 'yes',
                               phjMissingValue = 'missing',
                               phjNumberOfCategoriesInt = 5,
                               phjNewCategoryVarName = 'categoricalVar',
                               phjCategorisationMethod = 'jenks',
                               phjGroupNameVar = None,
                               phjPrintResults = False)

with pd.option_context('display.max_rows', 10, 'display.max_columns', 10):
    print('Log odds for categorised variable')
    print(phjTempDF)

#### Categorise using quantile breaks and using 1 and 0 as binary outcome

In [None]:
# Define example dataset
phjTempDF = pd.DataFrame({'binDepVar':[1]*50000 + [0]*50000,
                          'riskFactorCont':np.random.uniform(0,1,100000)})

with pd.option_context('display.max_rows', 10, 'display.max_columns', 5):
    print(phjTempDF)

    
# View log odds
phjTempDF = epy.phjViewLogOdds(phjTempDF = phjTempDF,
                               phjBinaryDepVarName = 'binDepVar',
                               phjContIndepVarName = 'riskFactorCont',
                               phjCaseValue = 1,
                               phjMissingValue = 'missing',
                               phjNumberOfCategoriesInt = 8,
                               phjNewCategoryVarName = 'categoricalVar',
                               phjCategorisationMethod = 'quantile',
                               phjGroupNameVar = None,
                               phjPrintResults = False)

with pd.option_context('display.max_rows', 10, 'display.max_columns', 10):
    print('Log odds for categorised variable')
    print(phjTempDF)

### FUNCTION: phjCategoriseContinuousVariable()

---

## FILE: phjRROR

### FUNCTION: phjOddsRatio()

### FUNCTION: phjRelativeRisk()

---

## FILE: phjSelectData.py

### FUNCTION: phjSelectCaseControlDataset()

#### Unmatched controls

In [None]:
casesDF = pd.DataFrame({'animalID':[1,2,3,4,5],'var1':[43,45,34,45,56],'sp':['dog','dog','dog','dog','dog']})
potControlsDF = pd.DataFrame({'animalID':[11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30],
                              'var1':[34,54,34,23,34,45,56,67,56,67,78,98,65,54,34,76,87,56,45,34],
                              'sp':['dog','cat','dog','dog','cat','dog','cat','dog','cat','dog',
                                    'dog','dog','dog','cat','dog','cat','dog','dog','dog','cat']})

print("This dataframe contains all the cases of disease\n")
print(casesDF)
print("\n")
print("This dataframe contains all the animals you could potentially use as controls\n")
print(potControlsDF)
print("\n")

# Selecting unmatched controls
unmatchedDF = epy.phjSelectCaseControlDataset(phjCasesDF = casesDF,
                                              phjPotentialControlsDF = potControlsDF,
                                              phjUniqueIdentifierVarName = 'animalID',
                                              phjMatchingVariablesList = None,
                                              phjControlsPerCaseInt = 2,
                                              phjPrintResults = False)

print(unmatchedDF)

#### Matched controls

In [None]:
casesDF = pd.DataFrame({'animalID':[1,2,3,4,5],'var1':[43,45,34,45,56],'sp':['dog','dog','dog','dog','dog']})
potControlsDF = pd.DataFrame({'animalID':[11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30],
                              'var1':[34,54,34,23,34,45,56,67,56,67,78,98,65,54,34,76,87,56,45,34],
                              'sp':['dog','cat','dog','dog','cat','dog','cat','dog','cat','dog',
                                    'dog','dog','dog','cat','dog','cat','dog','dog','dog','cat']})

print("This dataframe contains all the cases of disease\n")
print(casesDF)
print("\n")
print("This dataframe contains all the animals you could potentially use as controls\n")
print(potControlsDF)
print("\n")

# Selecting controls that are matched to cases on variable 'sp'
matchedDF = epy.phjSelectCaseControlDataset(phjCasesDF = casesDF,
                                            phjPotentialControlsDF = potControlsDF,
                                            phjUniqueIdentifierVarName = 'animalID',
                                            phjMatchingVariablesList = ['sp'],
                                            phjControlsPerCaseInt = 2,
                                            phjPrintResults = False)

print(matchedDF)

### FUNCTION: phjGenerateCaseControlDataset()

### FUNCTION: phjCollapseOnPatientID()

---

## File: phjCalculateProportions.py

---

## File: phjExploreData.py

### Function: phjCategoriseContinuousVariable()

#### Return dataframe alone

In [None]:
# Define example dataset
phjTempDF = pd.DataFrame({'binDepVar':['yes']*50000 + ['no']*50000,
                          'riskFactorCont':np.random.uniform(0,1,100000)})

with pd.option_context('display.max_rows', 10, 'display.max_columns', 5):
    print(phjTempDF)

    
# Categorise a continuous variable
phjTempDF = epy.phjCategoriseContinuousVariable(phjTempDF = phjTempDF,
                                                phjContinuousVarName = 'riskFactorCont',
                                                phjMissingValue = 'missing',
                                                phjNumberOfCategoriesInt = 6,
                                                phjNewCategoryVarName = 'catVar',
                                                phjCategorisationMethod = 'jenks',
                                                phjReturnBreaks = False,
                                                phjPrintResults = False)

with pd.option_context('display.max_rows', 10, 'display.max_columns', 5):
    print('\nLog odds for categorised variable')
    print(phjTempDF)

#### Return dataframe and list of breaks

In [None]:
# Define example dataset
phjTempDF = pd.DataFrame({'binDepVar':['yes']*50000 + ['no']*50000,
                          'riskFactorCont':np.random.uniform(0,1,100000)})

with pd.option_context('display.max_rows', 10, 'display.max_columns', 5):
    print(phjTempDF)

    
# Categorise a continuous variable
phjTempDF, phjBreaksList = epy.phjCategoriseContinuousVariable(phjTempDF = phjTempDF,
                                                               phjContinuousVarName = 'riskFactorCont',
                                                               phjMissingValue = 'missing',
                                                               phjNumberOfCategoriesInt = 6,
                                                               phjNewCategoryVarName = 'catVar',
                                                               phjCategorisationMethod = 'jenks',
                                                               phjReturnBreaks = True,
                                                               phjPrintResults = False)

with pd.option_context('display.max_rows', 10, 'display.max_columns', 5):
    print('\nCategorised variable')
    print(phjTempDF)
    print('\n')
    print('Breaks')
    print(phjBreaksList)

---