# How Do I Make a Reproducible DataFrame from my Existing DataFrame?

In [1]:
import pandas as pd
from pprint import pprint as pp

## Create your DataFrame from some file

In [2]:
df = pd.read_csv('data/2018-09-18_flavors_of_cacao.csv')

In [3]:
df.head()

Unnamed: 0,Company (Maker-if known),Specific Bean Origin or Bar Name,REF,Review Date,Cocoa Percent,Company Location,Rating,Bean Type,Broad Bean Origin
0,A. Morin,Agua Grande,1876,2016,63%,France,3.75,,Sao Tome
1,A. Morin,Kpime,1676,2015,70%,France,2.75,,Togo
2,A. Morin,Atsane,1676,2015,70%,France,3.0,,Togo
3,A. Morin,Akata,1680,2015,70%,France,3.5,,Togo
4,A. Morin,Quilla,1704,2015,70%,France,3.5,,Peru


## pretty print the headers

In [4]:
pp(list(df.columns))

['Company\xa0\n(Maker-if known)',
 'Specific Bean Origin\nor Bar Name',
 'REF',
 'Review\nDate',
 'Cocoa\nPercent',
 'Company\nLocation',
 'Rating',
 'Bean\nType',
 'Broad Bean\nOrigin']


## create a variable, copy the printed headers and assign the copy
### this can then be copied and pasted into Stack Overflow

In [5]:
sof_headers = ['Company\xa0\n(Maker-if known)',
 'Specific Bean Origin\nor Bar Name',
 'REF',
 'Review\nDate',
 'Cocoa\nPercent',
 'Company\nLocation',
 'Rating',
 'Bean\nType',
 'Broad Bean\nOrigin']

## pretty print some small range of the DataFrame values

In [6]:
pp(df.iloc[0:10].values)

array([['A. Morin', 'Agua Grande', 1876, 2016, '63%', 'France', 3.75,
        '\xa0', 'Sao Tome'],
       ['A. Morin', 'Kpime', 1676, 2015, '70%', 'France', 2.75, '\xa0',
        'Togo'],
       ['A. Morin', 'Atsane', 1676, 2015, '70%', 'France', 3.0, '\xa0',
        'Togo'],
       ['A. Morin', 'Akata', 1680, 2015, '70%', 'France', 3.5, '\xa0',
        'Togo'],
       ['A. Morin', 'Quilla', 1704, 2015, '70%', 'France', 3.5, '\xa0',
        'Peru'],
       ['A. Morin', 'Carenero', 1315, 2014, '70%', 'France', 2.75,
        'Criollo', 'Venezuela'],
       ['A. Morin', 'Cuba', 1315, 2014, '70%', 'France', 3.5, '\xa0',
        'Cuba'],
       ['A. Morin', 'Sur del Lago', 1315, 2014, '70%', 'France', 3.5,
        'Criollo', 'Venezuela'],
       ['A. Morin', 'Puerto Cabello', 1319, 2014, '70%', 'France', 3.75,
        'Criollo', 'Venezuela'],
       ['A. Morin', 'Pablino', 1319, 2014, '70%', 'France', 4.0, '\xa0',
        'Peru']], dtype=object)


## create a variable, copy the printed values and assign the copy
### this can be copied and pasted into Stack Overflow

In [7]:
sof_values = [['A. Morin', 'Agua Grande', 1876, 2016, '63%', 'France', 3.75,
        '\xa0', 'Sao Tome'],
       ['A. Morin', 'Kpime', 1676, 2015, '70%', 'France', 2.75, '\xa0',
        'Togo'],
       ['A. Morin', 'Atsane', 1676, 2015, '70%', 'France', 3.0, '\xa0',
        'Togo'],
       ['A. Morin', 'Akata', 1680, 2015, '70%', 'France', 3.5, '\xa0',
        'Togo'],
       ['A. Morin', 'Quilla', 1704, 2015, '70%', 'France', 3.5, '\xa0',
        'Peru'],
       ['A. Morin', 'Carenero', 1315, 2014, '70%', 'France', 2.75,
        'Criollo', 'Venezuela'],
       ['A. Morin', 'Cuba', 1315, 2014, '70%', 'France', 3.5, '\xa0',
        'Cuba'],
       ['A. Morin', 'Sur del Lago', 1315, 2014, '70%', 'France', 3.5,
        'Criollo', 'Venezuela'],
       ['A. Morin', 'Puerto Cabello', 1319, 2014, '70%', 'France', 3.75,
        'Criollo', 'Venezuela'],
       ['A. Morin', 'Pablino', 1319, 2014, '70%', 'France', 4.0, '\xa0',
        'Peru']]

## Using sof_values and sof_headers, the Stack Overflow community can easily reproduce your DataFrame and more easily answer your question

In [8]:
sof_df = pd.DataFrame(sof_values, columns=sof_headers)

In [9]:
sof_df

Unnamed: 0,Company (Maker-if known),Specific Bean Origin or Bar Name,REF,Review Date,Cocoa Percent,Company Location,Rating,Bean Type,Broad Bean Origin
0,A. Morin,Agua Grande,1876,2016,63%,France,3.75,,Sao Tome
1,A. Morin,Kpime,1676,2015,70%,France,2.75,,Togo
2,A. Morin,Atsane,1676,2015,70%,France,3.0,,Togo
3,A. Morin,Akata,1680,2015,70%,France,3.5,,Togo
4,A. Morin,Quilla,1704,2015,70%,France,3.5,,Peru
5,A. Morin,Carenero,1315,2014,70%,France,2.75,Criollo,Venezuela
6,A. Morin,Cuba,1315,2014,70%,France,3.5,,Cuba
7,A. Morin,Sur del Lago,1315,2014,70%,France,3.5,Criollo,Venezuela
8,A. Morin,Puerto Cabello,1319,2014,70%,France,3.75,Criollo,Venezuela
9,A. Morin,Pablino,1319,2014,70%,France,4.0,,Peru


## Using to_clipboard and read_clipboard

In [10]:
df.head(10).to_clipboard(sep=',', index=False)

## output of to_clipboard

In [None]:
"Company 
(Maker-if known)","Specific Bean Origin
or Bar Name",REF,"Review
Date","Cocoa
Percent","Company
Location",Rating,"Bean
Type","Broad Bean
Origin"
A. Morin,Agua Grande,1876,2016,63%,France,3.75, ,Sao Tome
A. Morin,Kpime,1676,2015,70%,France,2.75, ,Togo
A. Morin,Atsane,1676,2015,70%,France,3.0, ,Togo
A. Morin,Akata,1680,2015,70%,France,3.5, ,Togo
A. Morin,Quilla,1704,2015,70%,France,3.5, ,Peru
A. Morin,Carenero,1315,2014,70%,France,2.75,Criollo,Venezuela
A. Morin,Cuba,1315,2014,70%,France,3.5, ,Cuba
A. Morin,Sur del Lago,1315,2014,70%,France,3.5,Criollo,Venezuela
A. Morin,Puerto Cabello,1319,2014,70%,France,3.75,Criollo,Venezuela
A. Morin,Pablino,1319,2014,70%,France,4.0, ,Peru

## after executing to_clipboard, run pd.read_clipboard

In [11]:
pd.read_clipboard(sep=',')

Unnamed: 0,Company (Maker-if known),Specific Bean Origin or Bar Name,REF,Review Date,Cocoa Percent,Company Location,Rating,Bean Type,Broad Bean Origin
0,A. Morin,Agua Grande,1876,2016,63%,France,3.75,,Sao Tome
1,A. Morin,Kpime,1676,2015,70%,France,2.75,,Togo
2,A. Morin,Atsane,1676,2015,70%,France,3.0,,Togo
3,A. Morin,Akata,1680,2015,70%,France,3.5,,Togo
4,A. Morin,Quilla,1704,2015,70%,France,3.5,,Peru
5,A. Morin,Carenero,1315,2014,70%,France,2.75,Criollo,Venezuela
6,A. Morin,Cuba,1315,2014,70%,France,3.5,,Cuba
7,A. Morin,Sur del Lago,1315,2014,70%,France,3.5,Criollo,Venezuela
8,A. Morin,Puerto Cabello,1319,2014,70%,France,3.75,Criollo,Venezuela
9,A. Morin,Pablino,1319,2014,70%,France,4.0,,Peru
