In [1]:
import sys
import pathlib 
import urllib.request
import sys
import rdata
import feather
import pyreadr


In [2]:
def download_a_file_from_url(url: str, destination_path):
    urllib.request.urlretrieve(url, destination_path)

def import_rda_file_by_rdata(import_path, dataframe_name: str):
    parsed = rdata.parser.parse_file(import_path)
    converted = rdata.conversion.convert(parsed)
    return converted[dataframe_name]    

def import_external_file_as_dataframe(public_file_url, data_name, extension, import_method="feather"):

    absolute_dir = pathlib.Path().resolve()
    file_name = data_name + extension
    destination_path = absolute_dir.parent.joinpath("data", "public_data", file_name)
    download_a_file_from_url(public_file_url, destination_path)
    
    if (import_method == "rdata"):
        return import_rda_file_by_rdata(import_path=destination_path, dataframe_name=data_name)

#     this case returns error "ArrowInvalid: Not a Feather V1 or Arrow IPC file" when the file format is not feather
    elif (import_method == "feather"):
        return feather.read_dataframe(destination_path)[data_name]

    elif (import_method == "pyreadr"):
        return pyreadr.read_r(str(destination_path))[data_name]

    else:
        print("not registered import method. returns None")
        sys.exit(0)


In [3]:
public_file_url: str = "https://github.com/google/GeoexperimentsResearch/raw/master/data/geoassignment.rda"
# import_method = "rdata" # failed
# import_method = "feather" # failed
import_method = "pyreadr" # succeeded!

geoassignment_df = import_external_file_as_dataframe(
    public_file_url, 
    data_name="geoassignment", 
    extension=".rda", 
    import_method=import_method)
print(geoassignment_df)

    geo  geo.group
0     1          2
1     2          1
2     3          1
3     4          2
4     5          1
..  ...        ...
95   96          2
96   97          1
97   98          2
98   99          2
99  100          1

[100 rows x 2 columns]


In [4]:
geoassignment_df.head

<bound method NDFrame.head of     geo  geo.group
0     1          2
1     2          1
2     3          1
3     4          2
4     5          1
..  ...        ...
95   96          2
96   97          1
97   98          2
98   99          2
99  100          1

[100 rows x 2 columns]>

In [5]:
public_file_url: str = "https://github.com/google/GeoexperimentsResearch/raw/master/data/salesandcost.rda"
salesandcost_df = import_external_file_as_dataframe(
    public_file_url, 
    data_name="salesandcost", 
    extension=".rda", 
    import_method=import_method)


In [6]:
salesandcost_df.head

<bound method NDFrame.head of             date  geo    sales  cost
0     2015-01-05    1  7227.32   0.0
1     2015-01-05   10  1827.21   0.0
2     2015-01-05  100    23.98   0.0
3     2015-01-05   11  1501.10   0.0
4     2015-01-05   12  1371.61   0.0
...          ...  ...      ...   ...
9220  2015-04-07   95    49.01   0.0
9221  2015-04-07   96    49.01   0.0
9222  2015-04-07   97    35.01   0.0
9223  2015-04-07   98    49.01   0.0
9224  2015-04-07   99    35.01   0.0

[9225 rows x 4 columns]>