In [124]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
import numpy as np
import os
import re
from scipy import stats as st
from statsmodels.stats.proportion import proportions_ztest as ztest

In [45]:
plt.style.use('fivethirtyeight')
pd.set_option('display.float_format', '{:.2f}'.format)
pd.set_option('display.min_rows', 20)
pd.set_option('display.max_columns', None)
pd.set_option('mode.chained_assignment', None)

# Test case #1

Assuming all files are identical in their structure being the segments of a larger chunk, we should be just fine with a loop concatenating the small portions back into one dataset. At four million rows we're unlikely to lack memory, however in such a case it's fairly easy to build in some data type optimization for each file.  
To make it easier we can put all the csv files into one folder created specifically for this purpose, and afterwards save the single file in case we need it later.

In [17]:
df = pd.DataFrame()

for file in os.scandir('csvs/'):
    print(f'Reading file: {file}')
    tmp = pd.read_csv(file)
    df = pd.concat([df, tmp])
    
df.info()

df.to_csv('the_whole_dataset.csv')

Reading file: <DirEntry 'test_csv - Copy (2).csv'>
Reading file: <DirEntry 'test_csv - Copy (3).csv'>
Reading file: <DirEntry 'test_csv - Copy (4).csv'>
Reading file: <DirEntry 'test_csv - Copy (5).csv'>
Reading file: <DirEntry 'test_csv - Copy (6).csv'>
Reading file: <DirEntry 'test_csv - Copy (7).csv'>
Reading file: <DirEntry 'test_csv - Copy (8).csv'>
Reading file: <DirEntry 'test_csv - Copy (9).csv'>
Reading file: <DirEntry 'test_csv - Copy.csv'>
Reading file: <DirEntry 'test_csv.csv'>
<class 'pandas.core.frame.DataFrame'>
Int64Index: 5761020 entries, 0 to 576101
Data columns (total 6 columns):
 #   Column             Dtype 
---  ------             ----- 
 0   HomeFname          object
 1   HomeLname          object
 2   Account            int64 
 3   HomeUserID         int64 
 4   HomeEventDatetime  object
 5   EventType          object
dtypes: int64(2), object(4)
memory usage: 307.7+ MB


# Test Case #2

## Results, before you read the rest

The first stab at this research was based on the assumption that past members contain the same valuable indicators as the ten users who did pay for a membership or a ticket.  

I compared the scarce numerical data and quantified several data points to look for measurable differences between the groups. The bottom line is I found next to nothing, and even the noticable difference in the number of subscribers which makes sense on paper, turns out to not be statistically significant.  

If you're still curious about my train of thought feel free to continue reading.

## First look

In [3]:
df = pd.read_excel('linkedin.xlsx')
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 348 entries, 0 to 347
Data columns (total 74 columns):
 #   Column                        Non-Null Count  Dtype         
---  ------                        --------------  -----         
 0   linkedinProfile               348 non-null    object        
 1   Result                        348 non-null    object        
 2   Other                         191 non-null    object        
 3   email                         216 non-null    object        
 4   description                   278 non-null    object        
 5   headline                      348 non-null    object        
 6   location                      348 non-null    object        
 7   firstName                     348 non-null    object        
 8   lastName                      348 non-null    object        
 9   fullName                      348 non-null    object        
 10  subscribers                   347 non-null    float64       
 11  userId                        28

In [4]:
df.head()

Unnamed: 0,linkedinProfile,Result,Other,email,description,headline,location,firstName,lastName,fullName,subscribers,userId,mutualConnectionsText,mailFromDropcontact,company,companyUrl,jobTitle,jobDescription,jobLocation,jobDateRange,company2,companyUrl2,jobTitle2,jobDescription2,jobDateRange2,school,schoolUrl,schoolDegree,schoolDegreeSpec,schoolDateRange,school2,schoolUrl2,schoolDegree2,schoolDegreeSpec2,schoolDateRange2,qualificationFromDropContact,civilityFromDropContact,phoneNumberFromDropContact,websiteFromDropContact,twitter,website,companyWebsite,allSkills,skill1,endorsement1,skill2,endorsement2,skill3,endorsement3,skill4,endorsement4,skill5,endorsement5,skill6,endorsement6,baseUrl,profileId,timestamp,jobLocation2,connectionsCount,mail,birthday,schoolDescription2,naf5CodeFromDropContact,naf5DesFromDropContact,sirenFromDropContact,siretFromDropContact,siretAddressFromDropContact,siretZipFromDropContact,vatFromDropContact,schoolDescription,nbEmployeesFromDropContact,Unnamed: 72,Unnamed: 73
0,https://www.linkedin.com/in/mark-kohoot-96a08b1,5. Purchased Membership,Hunter,mk@aeroscena.com,Mark founded Aeroscena in 2010 with the missio...,Bio-pharmaceutical / functional fragrance,"Cleveland, Ohio, United States",Mark,Kohoot,Mark Kohoot,1704.0,3172258.0,1 mutual connection: Eric Sullivan,mk@aeroscena.com,Aeroscena,https://www.linkedin.com/company/aeroscena/,CEO,Aeroscena develops and markets fragrances from...,"Cleveland/Akron, Ohio Area",Sep 2010 â€“ Present,Cleveland Clinic Wellness Institute,https://www.linkedin.com/company/cleveland-cli...,Advisor to the Chairman,Supporting the Chairman of the Wellness Instit...,2009 â€“ 2010,Audencia Nantes Ecole de Management,https://www.linkedin.com/school/19998/?legacyS...,master,marketing,1984 â€“ 1985,The Ohio State University,https://www.linkedin.com/school/19133/?legacyS...,BSBA,International Business,1979 â€“ 1983,nominative@pro,Mr,+1 800-671-1890,www.aeroscena.com,ShopAscents,stumbleupon.com/content/1LSMVB,https://aeroscena.com,"Strategic Planning, Leadership, Marketing Stra...",Strategic Planning,31,Leadership,26,Marketing Strategy,24,Strategy,21,New Business Development,18,Business Strategy,15,https://www.linkedin.com/in/mark-kohoot-96a08b1,mark-kohoot-96a08b1,2022-01-03T20:00:58.586Z,,,,NaT,,,,,,,,,,,,
1,https://www.linkedin.com/in/jonbowerman,5. Purchased Membership,Hunter,jon@kidsdraw.org,A creative problem solver who can rapidly desi...,Co Founder and CTO - Product Development and M...,"Farmington, Michigan, United States",Jon,Bowerman,Jon Bowerman,1745.0,14910006.0,1 mutual connection: Eric Sullivan,jon@kidsdraw.org,Kids Draw Bob,https://www.linkedin.com/company/kidsdrawbob/,President and Illustrator,The energy in a room of children who've found ...,"Bloomfield Hills, MI",Jul 2019 â€“ Present,"High Level Marketing, LLC",https://www.linkedin.com/company/high-level-ma...,Co Founder and CTO - VP of Product Development,As one of the fastest growing online marketing...,Nov 2009 â€“ Present,University of Michigan,https://www.linkedin.com/school/18633/?legacyS...,Bachelor of Science (B.S.),Architecture,1999 â€“ 2004,Walled Lake Central High School,https://www.linkedin.com/school/3204390/?legac...,High School Diploma,,1995 â€“ 1999,nominative@pro,Mr,,www.kidsdraw.org,,,http://www.kidsdraw.org/donate,"Web Design, Marketing, Business Strategy, SEO,...",Web Design,99,Marketing,66,Business Strategy,11,SEO,47,Web Development,40,Online Marketing,32,https://www.linkedin.com/in/jonbowerman,jonbowerman,2022-01-03T20:01:43.218Z,"West Bloomfield, MI",,,NaT,,,,,,,,,,,,
2,https://www.linkedin.com/in/parkeralynch,5. Purchased Membership,Hunter,Parker@HedgeHogHealth.com,,CEO at HedgeHog Health | 2020-2021 Global Educ...,"Royal Oak, Michigan, United States",Parker,Lynch,Parker Lynch,2549.0,22364061.0,,parker@hedgehoghealth.com,HedgeHog Health,https://www.linkedin.com/company/hedgehoghealth/,Chief Executive Officer,Revolutionizing education technology and welln...,,Nov 2019 â€“ Present,Born and Raised Detroit Foundation,https://www.linkedin.com/company/born-and-rais...,Executive Director,â€¢Organize events in cities from coast to coa...,Jan 2016 â€“ Present,New York University,https://www.linkedin.com/school/18993/?legacyS...,Dual MA,"Childhood Education, Special Education",2011 â€“ 2013,Indiana University Bloomington,https://www.linkedin.com/school/18342/?legacyS...,Bachelor's degree,"English Language and Literature, General",2002 â€“ 2006,nominative@pro,Mr,+248 6 577 050,www.hedgehoghealth.com,,bornandraiseddetroit.org/,http://www.hedgehoghealth.com,"Editing, Blogging, Social Media, Event Plannin...",Editing,39,Blogging,31,Social Media,25,Event Planning,14,SEO,11,Curriculum Development,8,https://www.linkedin.com/in/parkeralynch,parkeralynch,2022-01-03T20:02:59.669Z,Greater Detroit Area,500.0,Parker@HedgeHogHealth.com,2023-05-15,,,,,,,,,,,,
3,https://www.linkedin.com/in/shacharschiff,5. Purchased Membership,Hunter,shachar@badtesting.com,I'm the founder and principal consultant at Ba...,"I believe that high-quality, powerful software...",Dallas-Fort Worth Metroplex,Shachar,Schiff,Shachar Schiff,1696.0,35484653.0,2 mutual connections: Andrew Storz and Eric Su...,shachar@badtesting.com,BadTestingÂ® â€” Better Software. On Time.,https://www.linkedin.com/company/badtesting/,Founder and Principal,"At BadTesting, we exist because of a simple be...","Austin, Texas, United States",Mar 2012 â€“ Present,The Webby Awards,https://www.linkedin.com/company/the-webby-awa...,Associate Judge,The Webby Awards is the leading international ...,Feb 2015 â€“ Present,DeVry University,https://www.linkedin.com/school/21203/?legacyS...,Bachelor of Science - BS,Electrical and Electronics Engineering,1997 â€“ 2000,,,,,,nominative@pro,Mr,+33 7 50 34 96 76,www.badtesting.com,,badtesting.com,http://www.badtesting.com,"Software Quality Assurance, Quality Assurance,...",Software Quality Assurance,71,Quality Assurance,69,Testing,66,System Testing,44,Usability Testing,42,Start-ups,35,https://www.linkedin.com/in/shacharschiff,shacharschiff,2022-01-03T20:03:52.557Z,,,,NaT,,,,,,,,,,,,
4,https://www.linkedin.com/in/gary-lacourt-811b1874,4.Purchased Event,Hunter,gary.lacourt@forevercompanies.com,Serial entrepreneur and business founder. Main...,"Founder and CEO, Forever Companies","Milwaukee, Wisconsin, United States",Gary,LaCourt,Gary LaCourt,710.0,265539469.0,,gary.lacourt@forevercompanies.com,Forever Companies,https://www.linkedin.com/company/forever-compa...,Founder & CEO,Founder and CEO of Forever Companies,Greater Milwaukee Area,Aug 2004 â€“ Present,Milwaukee Business Journal Leadership Trust,https://www.linkedin.com/company/milwaukee-bus...,Charter Member,,Jun 2019 â€“ Present,Northwestern University,https://www.linkedin.com/school/18290/?legacyS...,,,,University of Wisconsin-Milwaukee,https://www.linkedin.com/school/19691/?legacyS...,,,,nominative@pro,Mr,+1 800-509-4990,www.forevercompanies.com,,forevercompanies.com/,http://forevercompanies.com,"Marketing Strategy, Jewelry Design, E-commerce...",Marketing Strategy,20,Jewelry Design,18,E-commerce,17,Online Advertising,15,Entrepreneurship,13,Sales,10,https://www.linkedin.com/in/gary-lacourt-811b1874,gary-lacourt-811b1874,2022-01-03T20:04:29.760Z,,,,NaT,,,,,,,,,,,,


In [5]:
df.sample(5)

Unnamed: 0,linkedinProfile,Result,Other,email,description,headline,location,firstName,lastName,fullName,subscribers,userId,mutualConnectionsText,mailFromDropcontact,company,companyUrl,jobTitle,jobDescription,jobLocation,jobDateRange,company2,companyUrl2,jobTitle2,jobDescription2,jobDateRange2,school,schoolUrl,schoolDegree,schoolDegreeSpec,schoolDateRange,school2,schoolUrl2,schoolDegree2,schoolDegreeSpec2,schoolDateRange2,qualificationFromDropContact,civilityFromDropContact,phoneNumberFromDropContact,websiteFromDropContact,twitter,website,companyWebsite,allSkills,skill1,endorsement1,skill2,endorsement2,skill3,endorsement3,skill4,endorsement4,skill5,endorsement5,skill6,endorsement6,baseUrl,profileId,timestamp,jobLocation2,connectionsCount,mail,birthday,schoolDescription2,naf5CodeFromDropContact,naf5DesFromDropContact,sirenFromDropContact,siretFromDropContact,siretAddressFromDropContact,siretZipFromDropContact,vatFromDropContact,schoolDescription,nbEmployeesFromDropContact,Unnamed: 72,Unnamed: 73
156,https://www.linkedin.com/in/alexpurtell,2. Sent Personal f/u,Hunter,alex@horizontwolabs.com,,Partner & Growth Lead at Horizon Two Labs,"Columbus, Ohio, United States",Alex,Purtell,Alex Purtell,3304.0,189407345.0,,alex@horizontwolabs.com,Horizon Two Labs,https://www.linkedin.com/company/horizontwolabs/,Partner + Accelerator Director,At Horizon Two Labs we define the adjacent pos...,"Columbus, Ohio Area",Aug 2018 â€“ Present,Rekovo,https://www.linkedin.com/company/rekovo/,Co-founder & CEO,Rekovo is SAAS platform that uses real-time do...,Jan 2015 â€“ Present,The Ohio State University,https://www.linkedin.com/school/19133/?legacyS...,,Entrepreneurial and Small Business Operations,,,,,,,nominative@pro,Mr,,www.horizontwolabs.com,alexpurtell,,http://horizontwolabs.com,"Entrepreneurship, Start-ups, Marketing, Online...",Entrepreneurship,46.0,Start-ups,34.0,Marketing,29.0,Online Advertising,15.0,Business Development,9.0,Product Development,7.0,https://www.linkedin.com/in/alexpurtell,alexpurtell,2022-01-03T22:37:31.540Z,"Columbus, Ohio Area",,,NaT,,,,,,,,,,,,
337,https://www.linkedin.com/in/levi-baker-a54610109/,2021 Member,,,,Builder at Creative elements Inc,"Heber City, Utah, United States",Levi,Baker,Levi Baker,34.0,458370784.0,,,Creative elements Inc,,Builder,,,,,,,,,,,,,,,,,,,,Mr,,www.thecreativeelements.com,,,,,,,,,,,,,,,,,https://www.linkedin.com/in/levi-baker-a54610109,levi-baker-a54610109,2022-01-19T14:17:58.548Z,,,,NaT,,,,,,,,,,,,
29,https://www.linkedin.com/in/johannesariens,3. Approved,Hollis,ja@routeline.com,If anything Iâ€™m up to sounds interesting or ...,CEO at Route Line - Create & Execute,Greater Seattle Area,Johannes,Ariens,Johannes Ariens,1239.0,137417395.0,,ja@routeline.com,Route Line,https://www.linkedin.com/company/route-line/,CEO,Route Line is an end-to-end adventure company ...,"Seattle, Washington, United States",Jul 2021 â€“ Present,Commodity Project,https://www.linkedin.com/company/commodityproj...,Principal,The Commodity Project is a platform brand and ...,Jan 2021 â€“ Present,Harvard Business School,https://www.linkedin.com/school/18484/?legacyS...,,Alternative Investments,2021 â€“ 2021,University of Washington,https://www.linkedin.com/school/19657/?legacyS...,Professional Certificate Program,Commercial Real Estate Development,2015 â€“ 2016,nominative@pro,Mr,,www.routeline.com,RadifyDev,,http://www.routeline.com,"Construction, Submittals, Construction Managem...",Construction,36.0,Submittals,23.0,Construction Management,18.0,LEED,17.0,Project Management,15.0,LEED AP,13.0,https://www.linkedin.com/in/johannesariens,johannesariens,2022-01-03T20:28:57.663Z,"Seattle, WA",500.0,,2023-06-11,,,,,,,,,,,,
281,https://www.linkedin.com/in/divatommei/,2021 Member,,diva.tommei@eitdigital.eu,,Head of EIT Digital Accelerator,"Rome, Latium, Italy",Diva,Tommei,Diva Tommei,5601.0,73384298.0,,,EIT Digital,https://www.linkedin.com/company/eit-digital/,Head of EIT Digital Accelerator,,"Rome, Latium, Italy",Jan 2022 â€“ Present,SECO spa,https://www.linkedin.com/company/seco-spa/,Non Executive Director,SECO is a publicly traded company leader in em...,May 2021 â€“ Present,University of Cambridge,https://www.linkedin.com/school/12691/?legacyS...,PhD,Molecular and Computational Biology,2008 â€“ 2013,"University of Rome ""La Sapienza""",,Master of Science (MS),Genomic Biotechnology,2005 â€“ 2007,,Mrs,,www.eitdigital.eu,DivaSolenica,,http://www.eitdigital.eu,"Entrepreneurship, Start-ups, Research, Bioinfo...",Entrepreneurship,54.0,Start-ups,53.0,Research,37.0,Bioinformatics,23.0,Biotechnology,20.0,Project Management,20.0,https://www.linkedin.com/in/divatommei,divatommei,2022-01-19T13:33:40.633Z,"Arezzo, Tuscany, Italy",,,2023-04-23,"Summa Cum Laude, GPA 3.95",,,,,,,,"Thesis: ""Transcriptional Characterisation of G...",2023-01-02 00:00:00,,diva.tommei@eitdigital.eu
222,https://www.linkedin.com/in/sean-holmes-703a4538/,2021 Member,,sean@nectarsunglasses.com,Experienced Co-Founder with a demonstrated his...,Co-Founder - Nectar Sunglasses,"Charleston, South Carolina, United States",Sean,Holmes,Sean Holmes,2466.0,,,sean@nectarsunglasses.com,Nectar Sunglasses,https://www.linkedin.com/company/33296600/,Co-founder Nectar Sunglasses,,,Oct 2011 - Present Â· 10 yrs 4 mos,Azzurro,https://www.linkedin.com/search/results/all/?k...,Server,,Nov 2011 - Feb 2012 Â· 4 mos,Virginia Commonwealth University,https://www.linkedin.com/company/166810/,,,2006 - 2010,Virginia Commonwealth University,https://www.linkedin.com/company/166810/,Urban planning & Marketing,,2006 - 2010,nominative@pro,,+84 38066133,www.nectarsunglasses.com,,nectarsunglasses.com,http://www.nectarsunglasses.com,"Customer Service, Social Media Marketing, Soci...",Customer Service,31.0,Social Media Marketing,29.0,Social Media,29.0,Marketing,21.0,Social Networking,19.0,Sales,18.0,https://www.linkedin.com/in/sean-holmes-703a4538,sean-holmes-703a4538,2022-01-19T13:49:37.176Z,"Charleston, South Carolina",,,NaT,,,,,,,,,,,,


In [6]:
df.duplicated().sum()

0

We have 348 LinkedIn profiles, with no duplicates at least at first glance. A lot of missing data with no way of filling in the gaps, so we'll take a closer look on how it's distributed in our target groups, as well as look for potential categorical data points.

## Examining data

In [7]:
df.Result.value_counts()

2021 Member                157
2. Sent Personal f/u       127
3. Approved                 28
Out of Sequence             26
4.Purchased Event            6
5. Purchased Membership      4
Name: Result, dtype: int64

So if we count both ideal and second best outcomes our conversion rate is ~3% which sounds normal for an outreach, however it leaves us with very little data; so we'll have to examine past members too as they have at some point paid for a membership - why they are no longer doing so is another question (assuming they have not indeed renewed their membership).

In [12]:
won = df[(df['Result']=='5. Purchased Membership') | (df['Result']=='4.Purchased Event')]
won

Unnamed: 0,linkedinProfile,Result,Other,email,description,headline,location,firstName,lastName,fullName,subscribers,userId,mutualConnectionsText,mailFromDropcontact,company,companyUrl,jobTitle,jobDescription,jobLocation,jobDateRange,company2,companyUrl2,jobTitle2,jobDescription2,jobDateRange2,school,schoolUrl,schoolDegree,schoolDegreeSpec,schoolDateRange,school2,schoolUrl2,schoolDegree2,schoolDegreeSpec2,schoolDateRange2,qualificationFromDropContact,civilityFromDropContact,phoneNumberFromDropContact,websiteFromDropContact,twitter,website,companyWebsite,allSkills,skill1,endorsement1,skill2,endorsement2,skill3,endorsement3,skill4,endorsement4,skill5,endorsement5,skill6,endorsement6,baseUrl,profileId,timestamp,jobLocation2,connectionsCount,mail,birthday,schoolDescription2,naf5CodeFromDropContact,naf5DesFromDropContact,sirenFromDropContact,siretFromDropContact,siretAddressFromDropContact,siretZipFromDropContact,vatFromDropContact,schoolDescription,nbEmployeesFromDropContact,Unnamed: 72,Unnamed: 73
0,https://www.linkedin.com/in/mark-kohoot-96a08b1,5. Purchased Membership,Hunter,mk@aeroscena.com,Mark founded Aeroscena in 2010 with the missio...,Bio-pharmaceutical / functional fragrance,"Cleveland, Ohio, United States",Mark,Kohoot,Mark Kohoot,1704.0,3172258.0,1 mutual connection: Eric Sullivan,mk@aeroscena.com,Aeroscena,https://www.linkedin.com/company/aeroscena/,CEO,Aeroscena develops and markets fragrances from...,"Cleveland/Akron, Ohio Area",Sep 2010 â€“ Present,Cleveland Clinic Wellness Institute,https://www.linkedin.com/company/cleveland-cli...,Advisor to the Chairman,Supporting the Chairman of the Wellness Instit...,2009 â€“ 2010,Audencia Nantes Ecole de Management,https://www.linkedin.com/school/19998/?legacyS...,master,marketing,1984 â€“ 1985,The Ohio State University,https://www.linkedin.com/school/19133/?legacyS...,BSBA,International Business,1979 â€“ 1983,nominative@pro,Mr,+1 800-671-1890,www.aeroscena.com,ShopAscents,stumbleupon.com/content/1LSMVB,https://aeroscena.com,"Strategic Planning, Leadership, Marketing Stra...",Strategic Planning,31,Leadership,26,Marketing Strategy,24,Strategy,21,New Business Development,18,Business Strategy,15,https://www.linkedin.com/in/mark-kohoot-96a08b1,mark-kohoot-96a08b1,2022-01-03T20:00:58.586Z,,,,NaT,,,,,,,,,,,,
1,https://www.linkedin.com/in/jonbowerman,5. Purchased Membership,Hunter,jon@kidsdraw.org,A creative problem solver who can rapidly desi...,Co Founder and CTO - Product Development and M...,"Farmington, Michigan, United States",Jon,Bowerman,Jon Bowerman,1745.0,14910006.0,1 mutual connection: Eric Sullivan,jon@kidsdraw.org,Kids Draw Bob,https://www.linkedin.com/company/kidsdrawbob/,President and Illustrator,The energy in a room of children who've found ...,"Bloomfield Hills, MI",Jul 2019 â€“ Present,"High Level Marketing, LLC",https://www.linkedin.com/company/high-level-ma...,Co Founder and CTO - VP of Product Development,As one of the fastest growing online marketing...,Nov 2009 â€“ Present,University of Michigan,https://www.linkedin.com/school/18633/?legacyS...,Bachelor of Science (B.S.),Architecture,1999 â€“ 2004,Walled Lake Central High School,https://www.linkedin.com/school/3204390/?legac...,High School Diploma,,1995 â€“ 1999,nominative@pro,Mr,,www.kidsdraw.org,,,http://www.kidsdraw.org/donate,"Web Design, Marketing, Business Strategy, SEO,...",Web Design,99,Marketing,66,Business Strategy,11,SEO,47,Web Development,40,Online Marketing,32,https://www.linkedin.com/in/jonbowerman,jonbowerman,2022-01-03T20:01:43.218Z,"West Bloomfield, MI",,,NaT,,,,,,,,,,,,
2,https://www.linkedin.com/in/parkeralynch,5. Purchased Membership,Hunter,Parker@HedgeHogHealth.com,,CEO at HedgeHog Health | 2020-2021 Global Educ...,"Royal Oak, Michigan, United States",Parker,Lynch,Parker Lynch,2549.0,22364061.0,,parker@hedgehoghealth.com,HedgeHog Health,https://www.linkedin.com/company/hedgehoghealth/,Chief Executive Officer,Revolutionizing education technology and welln...,,Nov 2019 â€“ Present,Born and Raised Detroit Foundation,https://www.linkedin.com/company/born-and-rais...,Executive Director,â€¢Organize events in cities from coast to coa...,Jan 2016 â€“ Present,New York University,https://www.linkedin.com/school/18993/?legacyS...,Dual MA,"Childhood Education, Special Education",2011 â€“ 2013,Indiana University Bloomington,https://www.linkedin.com/school/18342/?legacyS...,Bachelor's degree,"English Language and Literature, General",2002 â€“ 2006,nominative@pro,Mr,+248 6 577 050,www.hedgehoghealth.com,,bornandraiseddetroit.org/,http://www.hedgehoghealth.com,"Editing, Blogging, Social Media, Event Plannin...",Editing,39,Blogging,31,Social Media,25,Event Planning,14,SEO,11,Curriculum Development,8,https://www.linkedin.com/in/parkeralynch,parkeralynch,2022-01-03T20:02:59.669Z,Greater Detroit Area,500.0,Parker@HedgeHogHealth.com,2023-05-15,,,,,,,,,,,,
3,https://www.linkedin.com/in/shacharschiff,5. Purchased Membership,Hunter,shachar@badtesting.com,I'm the founder and principal consultant at Ba...,"I believe that high-quality, powerful software...",Dallas-Fort Worth Metroplex,Shachar,Schiff,Shachar Schiff,1696.0,35484653.0,2 mutual connections: Andrew Storz and Eric Su...,shachar@badtesting.com,BadTestingÂ® â€” Better Software. On Time.,https://www.linkedin.com/company/badtesting/,Founder and Principal,"At BadTesting, we exist because of a simple be...","Austin, Texas, United States",Mar 2012 â€“ Present,The Webby Awards,https://www.linkedin.com/company/the-webby-awa...,Associate Judge,The Webby Awards is the leading international ...,Feb 2015 â€“ Present,DeVry University,https://www.linkedin.com/school/21203/?legacyS...,Bachelor of Science - BS,Electrical and Electronics Engineering,1997 â€“ 2000,,,,,,nominative@pro,Mr,+33 7 50 34 96 76,www.badtesting.com,,badtesting.com,http://www.badtesting.com,"Software Quality Assurance, Quality Assurance,...",Software Quality Assurance,71,Quality Assurance,69,Testing,66,System Testing,44,Usability Testing,42,Start-ups,35,https://www.linkedin.com/in/shacharschiff,shacharschiff,2022-01-03T20:03:52.557Z,,,,NaT,,,,,,,,,,,,
4,https://www.linkedin.com/in/gary-lacourt-811b1874,4.Purchased Event,Hunter,gary.lacourt@forevercompanies.com,Serial entrepreneur and business founder. Main...,"Founder and CEO, Forever Companies","Milwaukee, Wisconsin, United States",Gary,LaCourt,Gary LaCourt,710.0,265539469.0,,gary.lacourt@forevercompanies.com,Forever Companies,https://www.linkedin.com/company/forever-compa...,Founder & CEO,Founder and CEO of Forever Companies,Greater Milwaukee Area,Aug 2004 â€“ Present,Milwaukee Business Journal Leadership Trust,https://www.linkedin.com/company/milwaukee-bus...,Charter Member,,Jun 2019 â€“ Present,Northwestern University,https://www.linkedin.com/school/18290/?legacyS...,,,,University of Wisconsin-Milwaukee,https://www.linkedin.com/school/19691/?legacyS...,,,,nominative@pro,Mr,+1 800-509-4990,www.forevercompanies.com,,forevercompanies.com/,http://forevercompanies.com,"Marketing Strategy, Jewelry Design, E-commerce...",Marketing Strategy,20,Jewelry Design,18,E-commerce,17,Online Advertising,15,Entrepreneurship,13,Sales,10,https://www.linkedin.com/in/gary-lacourt-811b1874,gary-lacourt-811b1874,2022-01-03T20:04:29.760Z,,,,NaT,,,,,,,,,,,,
5,https://www.linkedin.com/in/lyle-stoflet-38a07ab8,4.Purchased Event,Hunter,stoflet@stratusindustries.com,Whatâ€™s to love about our design products?\n\...,Managing Partner at Stratus Industries / Gear ...,"Milwaukee, Wisconsin, United States",Lyle,Stoflet,Lyle Stoflet,343.0,418200730.0,,stoflet@stratusindustries.com,Stratus Industries / Gear Grove / Containers Up,,Managing Partner,Guiding our business directions and goals with...,"Milwaukee, Wisconsin, United States",May 2012 â€“ Present,Gear Grove,,Managing Partner,Creating my clients visions. Take pride in cre...,Aug 2012 â€“ Present,Waukesha County Technical College,https://www.linkedin.com/school/21291/?legacyS...,,Engineering,1999 â€“ 2001,,,,,,catch-all@pro,Mr,,www.stratusindustries.com,,,,"Upcycling, Woodworking, Metal Fabrication, Man...",Upcycling,2,Woodworking,4,Metal Fabrication,3,Manufacturing,3,Sales Management,2,Furniture,3,https://www.linkedin.com/in/lyle-stoflet-38a07ab8,lyle-stoflet-38a07ab8,2022-01-03T20:05:35.092Z,Greater Milwaukee Area,,,NaT,,,,,,,,,,,,
6,https://www.linkedin.com/in/whiteandrewdc,4.Purchased Event,Hunter,,Helping families and individuals reach their G...,Founder at Align & Co.,"Holland, Michigan, United States",Andrew,"White, D.C.","Andrew White, D.C.",799.0,167264628.0,,,Align & Co.,,Chief Executive Officer,Align & Co is an innovative health care brand ...,"Holland, Michigan, United States",Dec 2020 â€“ Present,AlignWell Chiropractic,,Entrepreneur,CEO at AlignWell Chiropractic. AlignWell Chiro...,Mar 2017 â€“ Present,Palmer College of Chiropractic,,Doctor of Chiropractic,Medicine,2013 â€“ 2017,Albion College,,Bachelor of Science (BS),"Biology, General",2009 â€“ 2013,,Mr,,www.aligntech.com,,,,"Microsoft Office, Public Speaking, PowerPoint,...",Microsoft Office,14,Public Speaking,12,PowerPoint,11,Sales,7,Social Media,5,Healing,1,https://www.linkedin.com/in/whiteandrewdc,whiteandrewdc,2022-01-03T20:06:27.268Z,"Holland, Michigan",,,2023-09-21,While attending Albion I double majored in bio...,86.23Z,Pratique dentaire,829994987.0,83000000000000.0,5 Avenue Pasteur,17400.0,FR17829994987,,,,
7,https://www.linkedin.com/in/donniemcfall,4.Purchased Event,Hunter,,,"Owner at McFall Consulting, Co-Founder at The ...","Minneapolis, Minnesota, United States",Donnie,McFall,Donnie McFall,1842.0,21326084.0,,,McFall Consulting Inc.,,Owner,A Service Disabled Veteran Owned Business tha...,"Winchester, Viginia",Jun 2013 â€“ Present,The Light Doctors,,Co-Founder,The Light Doctors are here help you choose the...,Oct 2019 â€“ Present,University of Minnesota - Carlson School of Ma...,https://www.linkedin.com/school/18673/?legacyS...,Master of Business Administration - MBA,,2020 â€“ 2022,American Public University System,https://www.linkedin.com/school/19704/?legacyS...,Bachelor of Business Administration - BBA,Marketing,2020,,Mr,,www.mcfallconsulting.com,,Mcfallconsulting.com,,"Sales, Leadership, Management, CRM, Sales Oper...",Sales,99+,Leadership,72,Management,53,CRM,38,Sales Operations,37,Photography,35,https://www.linkedin.com/in/donniemcfall,donniemcfall,2022-01-03T20:07:20.084Z,Greater Chicago Area,,,2023-06-24,,,,,,,,,,,,
8,https://www.linkedin.com/in/sykraft,4.Purchased Event,Hollis,sy@sykraft.com,I untie knots and as a brand building professi...,Founder of Fantastic Brands,Los Angeles Metropolitan Area,Sy,Kraft,Sy Kraft,12731.0,14176975.0,,sy@fantastic-brands.com,Fantastic Brands,https://www.linkedin.com/company/sy-kraft/,Founder,Fantastic Brandsâ„¢ is a brand-building speake...,Los Angeles Metropolitan Area,2005 â€“ Present,S.I.N.K. Consulting,https://www.linkedin.com/company/s.i.n.k.-cons...,Founder - Art Curation and Sourcing Specialist,"What started as a brand strategy agency, today...",2012 â€“ Present,California State University-Northridge,https://www.linkedin.com/school/17831/?legacyS...,B.A.,"Journalism, Public Relations",,"University of California, Santa Barbara",https://www.linkedin.com/school/17959/?legacyS...,Journalism,Public Relations,,nominative@pro,Mr,,www.fantastic-brands.com,SINKConsulting,sykraft.com,http://www.fantastic-brands.com,"Integrated Marketing, Digital Strategy, Digita...",Integrated Marketing,99+,Digital Strategy,79,Digital Marketing,77,Advertising,56,Social Media,35,Social Media Marketing,28,https://www.linkedin.com/in/sykraft,sykraft,2022-01-03T20:08:26.445Z,Greater Los Angeles Area,500.0,sy@sykraft.com,2023-05-12,,,,,,,,,Volunteer: Alzheimerâ€™s Association of the L...,,,
9,https://www.linkedin.com/in/alan-dahl-2a6781b,4.Purchased Event,Hollis,,,"Director, Advisor, Investor",Atlanta Metropolitan Area,Alan,Dahl,Alan Dahl,928.0,34755678.0,,,BootsUp Co.,,Co-Founder,,,Jul 2021 â€“ Present,EDG Partners,https://www.linkedin.com/company/edg-partners-...,"Managing Director, Founder",Healthcare Private Equity,2004 â€“ Present,Oklahoma State University,https://www.linkedin.com/school/19170/?legacyS...,MS,Accounting,1983 â€“ 1983,Oklahoma State University,https://www.linkedin.com/school/19170/?legacyS...,BS,Accounting,1979 â€“ 1983,,Mr,+81 59976058,www.bootsupholstery.net,,edgpartners.com,,"Mergers & Acquisitions, Due Diligence, Venture...",Mergers & Acquisitions,42,Due Diligence,25,Venture Capital,21,Private Equity,19,Long-term Care,18,Strategic Planning,17,https://www.linkedin.com/in/alan-dahl-2a6781b,alan-dahl-2a6781b,2022-01-03T20:09:27.543Z,,,,NaT,,,,,,,,,,,,


In [13]:
churned = df[df['Result']=='2021 Member']

In [19]:
churned.sample(5)

Unnamed: 0,linkedinProfile,Result,Other,email,description,headline,location,firstName,lastName,fullName,subscribers,userId,mutualConnectionsText,mailFromDropcontact,company,companyUrl,jobTitle,jobDescription,jobLocation,jobDateRange,company2,companyUrl2,jobTitle2,jobDescription2,jobDateRange2,school,schoolUrl,schoolDegree,schoolDegreeSpec,schoolDateRange,school2,schoolUrl2,schoolDegree2,schoolDegreeSpec2,schoolDateRange2,qualificationFromDropContact,civilityFromDropContact,phoneNumberFromDropContact,websiteFromDropContact,twitter,website,companyWebsite,allSkills,skill1,endorsement1,skill2,endorsement2,skill3,endorsement3,skill4,endorsement4,skill5,endorsement5,skill6,endorsement6,baseUrl,profileId,timestamp,jobLocation2,connectionsCount,mail,birthday,schoolDescription2,naf5CodeFromDropContact,naf5DesFromDropContact,sirenFromDropContact,siretFromDropContact,siretAddressFromDropContact,siretZipFromDropContact,vatFromDropContact,schoolDescription,nbEmployeesFromDropContact,Unnamed: 72,Unnamed: 73
306,https://www.linkedin.com/in/jayfaires/,2021 Member,,,"Faires created TWA (The Wellness Agency), whic...",The Wellness Agency (TWA),"Los Angeles County, California, United States",Jay,Faires,Jay Faires,2564.0,16437770.0,,,The Wellness Agency,https://www.linkedin.com/company/the-wellness-...,Founder,The Wellness Agency (TWA) empowers wellness bu...,"Venice, Ca",Jan 2018 â€“ Present,Lionsgate,https://www.linkedin.com/company/lionsgate/,President of Music,,2005 â€“ 2010,Duke University,https://www.linkedin.com/school/18765/?legacyS...,MBA,,1985 â€“ 1987,Sewanee: The University of the South,,"Bachelor of Arts, with Honors",Psychology,1981 â€“ 1985,,Mr,,www.thewellness.agency,docroc_jay,angel.co/jay-faires,http://www.thewellness.agency/,"Entrepreneurship, Television, Entertainment, S...",Entrepreneurship,0,Television,0,Entertainment,0,Social Media,0,Strategic Partnerships,0,New Media,0,https://www.linkedin.com/in/jayfaires,jayfaires,2022-01-19T13:52:40.395Z,,,,NaT,,,,,,,,,,,,
265,https://www.linkedin.com/in/chriskresser/,2021 Member,,chris@kresserinstitute.com,"Chris Kresser, M.S., L.Ac, is a practitioner o...",Co-Founder and President at California Center ...,"Oakland, California, United States",Chris,Kresser,Chris Kresser,2963.0,68898932.0,,,Kresser Institute,,Founder and CEO,,"Las Vegas, Nevada Area",Oct 2015 â€“ Present,California Center for Functional Medicine,https://www.linkedin.com/company/california-ce...,Co-Founder and President,,Oct 2014 â€“ Present,"Acupuncture & Integrative Medicine College, Be...",,MA,Acupuncture and Oriental Medicine,2007 â€“ 2010,UC Berkeley,,BA,Communications & Social Change,1992 â€“ 1996,,Mr,,www.kresserinstitute.com,chriskresser,thehealthyskeptic.org,,"Nutrition, Holistic Health, Blogging, Wellness...",Nutrition,99+,Holistic Health,99+,Blogging,99+,Wellness,99+,Healing,99+,Wellness Coaching,65,https://www.linkedin.com/in/chriskresser,chriskresser,2022-01-19T13:19:48.978Z,"Berkeley, CA",,,NaT,,,,,,,,,,,,chris@kresserinstitute.com
288,https://www.linkedin.com/in/eric-rieger-a559a010/,2021 Member,,,CRNA - In partnership with gastroenterologist ...,Sales & Market Dev @ Atrantil - Ambassador of ...,Dallas-Fort Worth Metroplex,Eric,Rieger,Eric Rieger,393.0,38280161.0,,,Atrantil,https://www.linkedin.com/company/atrantil/,PR-Sales and Market Development,"Developed by gastroenterologist Ken Brown, MD ...","Dallas, TX",Feb 2015 â€“ Present,Tequila 512 - Willis Importing,,Sales Consultant,Successful launch of Tequila 512 into the Texa...,2012 â€“ Present,Texas Tech University,https://www.linkedin.com/school/19505/?legacyS...,BS,Zoology/Chemistry,1994 â€“ 1998,Texas Wesleyan University,https://www.linkedin.com/school/19510/?legacyS...,MS,Anesthesia,2006 â€“ 2008,,Mr,,www.atrantil.com,,tequila512.com,http://atrantil.com,"Healthcare, Customer Service, Hospitals, Strat...",Healthcare,15,Customer Service,8,Hospitals,7,Strategic Planning,7,Sales,2,Sales Management,1,https://www.linkedin.com/in/eric-rieger-a559a010,eric-rieger-a559a010,2022-01-19T13:38:51.655Z,"Austin, Texas Area",,,NaT,,,,,,,,,,,,
320,https://www.linkedin.com/in/joshualevittnd/,2021 Member,,,Naturopathic physician and medical director at...,Naturopathic Physician,"Hamden, Connecticut, United States",Joshua,Levitt,Joshua Levitt,356.0,36833015.0,,,Whole Health,,"Naturopathic Physician, Medical Director",Natural soultions for common and complex medic...,,Mar 2006 â€“ Present,Natural Health Associates,,Naturopathic Physician,Naturopathic physician,Jan 2003 â€“ Mar 2006,Bastyr University,https://www.linkedin.com/school/19638/?legacyS...,ND,Naturopathic medicine,1997 â€“ 2001,"University of California, Los Angeles",https://www.linkedin.com/school/17950/?legacyS...,BS,Physiological science,1992 â€“ 1996,,Mr,,www.wholehealth.com,JoshuaLevittND,wholehealthct.com,,"Medicine, Naturopathy, Healthcare, Clinical Re...",Medicine,30,Naturopathy,24,Healthcare,19,Clinical Research,15,Nutrition,12,Medical Education,11,https://www.linkedin.com/in/joshualevittnd,joshualevittnd,2022-01-19T14:03:20.759Z,,,,NaT,,,,,,,,,,,,
194,https://www.linkedin.com/in/maxwellhertan/,2021 Member,,,"Lover of entrepreneurship, self-help, philosop...",Founder @ Silvi & Megaphone Marketing (AFR Fas...,"Santa Monica, California, United States",Maxwell,Hertan,Maxwell Hertan,7243.0,,,,My Silvi,https://www.linkedin.com/company/71044201/,Co-Founder,,,Aug 2020 - Present Â· 1 yr 6 mos,Megaphone Marketing,https://www.linkedin.com/company/2928866/,Founder / Director,,Nov 2012 - Present Â· 9 yrs 3 mos,Monash University,https://www.linkedin.com/company/5663/,"Bachelor of Commerce and Economics, Marketing/...",,2006 - 2011,,,,,,,,,www.mysilvi.com,MegaphoneM,megaphonemarketing.com.au,https://www.mysilvi.com,"Social Media Marketing, Social Media, Digital ...",Social Media Marketing,98,Social Media,88,Digital Marketing,78,Marketing,48,Online Advertising,47,Online Marketing,33,https://www.linkedin.com/in/maxwellhertan,maxwellhertan,2022-01-19T13:35:20.408Z,"Melbourne, Victoria, Australia",,,NaT,,,,,,,,,,,,


Seeing a lot of: "head honchos" (founder/owner/C-level etc.), also first-name email addresses. Subscribers, twitter, education and skills seem to vary a lot, as well as age and location. All seem to be within US, suggesting this was a US event.  
Let's take a look at the rest and see if any of that is different.

In [20]:
lost = df[(df.index.isin(won.index)==False) & (df.index.isin(churned.index)==False)]

In [30]:
lost.sample(5)

Unnamed: 0,linkedinProfile,Result,Other,email,description,headline,location,firstName,lastName,fullName,subscribers,userId,mutualConnectionsText,mailFromDropcontact,company,companyUrl,jobTitle,jobDescription,jobLocation,jobDateRange,company2,companyUrl2,jobTitle2,jobDescription2,jobDateRange2,school,schoolUrl,schoolDegree,schoolDegreeSpec,schoolDateRange,school2,schoolUrl2,schoolDegree2,schoolDegreeSpec2,schoolDateRange2,qualificationFromDropContact,civilityFromDropContact,phoneNumberFromDropContact,websiteFromDropContact,twitter,website,companyWebsite,allSkills,skill1,endorsement1,skill2,endorsement2,skill3,endorsement3,skill4,endorsement4,skill5,endorsement5,skill6,endorsement6,baseUrl,profileId,timestamp,jobLocation2,connectionsCount,mail,birthday,schoolDescription2,naf5CodeFromDropContact,naf5DesFromDropContact,sirenFromDropContact,siretFromDropContact,siretAddressFromDropContact,siretZipFromDropContact,vatFromDropContact,schoolDescription,nbEmployeesFromDropContact,Unnamed: 72,Unnamed: 73
11,https://www.linkedin.com/in/anne-zizzo-0075603,3. Approved,Hunter,,"At Zizzo Group, we believe that successful eng...",Founder & CEO at Zizzo Group - Engagement Mark...,"Milwaukee, Wisconsin, United States",Anne,Zizzo,Anne Zizzo,2775.0,10326535.0,,,"Zizzo Ventures, LLC",,Founder,"Investment, leasing, development","Milwaukee, Wisconsin, United States",May 2021 â€“ Present,Zizzo Group - Engagement Marketing,https://www.linkedin.com/company/zizzo-group-m...,Founder & CEO,,Aug 1995 â€“ Present,Marquette University,https://www.linkedin.com/school/19675/?legacyS...,BA,Journalism,1983 â€“ 1987,,,,,,,Mrs,,www.sanford-artedventures.com,,zizzogroup.com,,"Public Relations, Integrated Marketing, Social...",Public Relations,99+,Integrated Marketing,99+,Social Media,97,Advertising,49,Media Relations,39,Strategic Communications,29,https://www.linkedin.com/in/anne-zizzo-0075603,anne-zizzo-0075603,2022-01-03T20:11:36.502Z,"207 N. Milwaukee Street, Milwaukee, WI 53202",500.0,,NaT,,,,,,,,,,,,
15,https://www.linkedin.com/in/samantha-friedman-...,3. Approved,Hunter,,,"Manager, Marketing","Birmingham, Michigan, United States",Samantha,Friedman,Samantha Friedman,1176.0,72697048.0,,,The Mom Project,https://www.linkedin.com/company/the-mom-project/,Seller Marketing Manager @ Etsy,,"Brooklyn, New York, United States",Jul 2021 â€“ Present,Fuse45,https://www.linkedin.com/company/fuse45/,Co Founder & Co Owner,FUSE LIVE is a uniquely sustainable and full-b...,Jun 2015 â€“ Present,University of Michigan - Stephen M. Ross Schoo...,https://www.linkedin.com/school/18634/?legacyS...,Bachelor's of Business Administration,Marketing,2006 â€“ 2010,University of Michigan - Stephen M. Ross Schoo...,https://www.linkedin.com/school/18634/?legacyS...,BBA,Marketing,2006 â€“ 2010,,Mrs,,www.themomproject.com,Fuse45RoyalOak,watch.fuselive.net/,http://themomproject.com,"Marketing Communications, Marketing, Marketing...",Marketing Communications,24,Marketing,16,Marketing Strategy,10,Integrated Marketing,10,Event Management,8,New Business Development,5,https://www.linkedin.com/in/samantha-friedman-...,samantha-friedman-07441a20,2022-01-03T20:14:54.828Z,,,,NaT,2009 Receipient of BBA Entrepreneur of the Yea...,,,,,,,,,,,
129,https://www.linkedin.com/in/schelkidd,2. Sent Personal f/u,Hunter,,Two words can describe Larschelby; Passion & E...,Independent Boisset Collection Ambassador at B...,"Kohler, Wisconsin, United States",Larschelby Kidd,WisconsinWineGuy,Larschelby Kidd WisconsinWineGuy,772.0,83909867.0,,,Boisset Collection,https://www.linkedin.com/company/boisset-colle...,Independent Boisset Collection Ambassador,I am a Independent Boisset Collection Ambassad...,Wisconsin,Jan 2019 â€“ Present,Aras Promotions,https://www.linkedin.com/company/aras-promotions/,Brand Ambassador,"I demonstrate, educate and sample to customers...",Mar 2018 â€“ Present,,,,,,,,,,,,Mr,+1 707-963-6900,www.boissetcollection.com,VinoDiscoveries,my.boissetcollection.com/vinodiscoveries,http://www.boissetcollection.com,"Wine, Restaurants, Event Planning, Wine Tastin...",Wine,54,Restaurants,44,Event Planning,38,Wine Tasting,33,Food,25,Culinary Skills,24,https://www.linkedin.com/in/schelkidd,schelkidd,2022-01-03T22:11:25.186Z,"Sheboygan, Wisconsin Area",500.0,,NaT,,,,,,,,,,,,
87,https://www.linkedin.com/in/gary-peters-568585140,2. Sent Personal f/u,Hunter,,I am a professional writer and former newspape...,"Founder, Final Draft","Sheboygan Falls Town, Wisconsin, United States",Gary,Peters,Gary Peters,487.0,574728416.0,,,Final Draft,,"Founder, Final Draft",,,Apr 2017 â€“ Present,The Valley Sun,,Managing Editor,,Jan 1991 â€“ Mar 1992,University of Wisconsin,,,Journalism,1968 â€“ 1972,,,,,,,Mr,+81 89068930,www.finaldraft.com,scoops1948,,,"Professional writer, Editor, Proofreader, Corp...",Professional writer,3,Editor,1,Proofreader,1,Corporate Communications,0,English,0,Professional writer and former newspaper editor,3,https://www.linkedin.com/in/gary-peters-568585140,gary-peters-568585140,2022-01-03T21:25:30.484Z,"Appleton, WI",,,NaT,,82.99Z,Autres activitÃ©s de soutien aux entreprises n...,818906893.0,81900000000000.0,,,FR42818906893,,UnitÃ©s non employeuses,,
30,https://www.linkedin.com/in/todd-heath-351a1611,3. Approved,Hollis,,My goal is to lead a positive and productive l...,"Publisher + Art Director, Bomb Snow","Great Falls, Montana, United States",Todd,Heath,Todd Heath,1516.0,41337709.0,,,"Freelance, Self-Employed",,"Art Director, Designer, Pixel Pusher",I specialize in layout and catalog design for ...,Worldwide,2006 â€“ Present,Bomb Snow,https://www.linkedin.com/company/bomb-snow-mag...,Owner / Art Director,Bomb Snow is a media platform and quarterly mo...,Apr 2005 â€“ Present,Montana State University-Bozeman,https://www.linkedin.com/school/18753/?legacyS...,Bachelor of Fine Art,Graphic Design and Multimedia Production,2003 â€“ 2005,Ferris State University,https://www.linkedin.com/school/18605/?legacyS...,Associates Degree,Visual Design and Web Media,2000 â€“ 2002,,Mr,,www.jenraygenre.com,BombSnow,bombsnow.com,,"Art Direction, Graphic Design, Photography, We...",Art Direction,72,Graphic Design,69,Photography,46,Web Design,37,Social Media Marketing,33,Advertising,31,https://www.linkedin.com/in/todd-heath-351a1611,todd-heath-351a1611,2022-01-03T20:29:37.482Z,"Bozeman, Montana",,,NaT,,,,,,,,,,,,


A quick look confirms the impression of the outreach being targeted at business leaders, superficially there is little to no difference between the profiles. Next we'll pick a few characteristicts to compare won, churned and lost members/attendees to see if we can detect any patterns.

In [102]:
good_combined = pd.concat([won,churned])

In [103]:
dfs = (('won',won), ('churned',churned), ('combined positive', good_combined), ('lost',lost)) #for mass processing

## Detecting patterns

In [104]:
for frame in dfs:
    print(f'Examining {frame[0]} leads.')
    print(f'Mean subscribers: {frame[1].subscribers.mean()}, median subscribers: {frame[1].subscribers.median()}')
    print(f'Mean user id: {frame[1].userId.mean()}')
    print('='*100)

Examining won leads.
Mean subscribers: 2504.7, median subscribers: 1700.0
Mean user id: 99719454.2
Examining churned leads.
Mean subscribers: 2621.121794871795, median subscribers: 1470.5
Mean user id: 114916101.24489796
Examining combined positive leads.
Mean subscribers: 2614.10843373494, median subscribers: 1525.0
Mean user id: 113509004.2962963
Examining lost leads.
Mean subscribers: 2628.1215469613257, median subscribers: 1175.0
Mean user id: 152985565.2872928


We have little numerical data - user ids are barely that, although they might suggest that more seasoned LinkedIn users (= smaller id) tend to be more receptive to the events and membership.  
While mean number of subscribers is almost identical across the three groups, medians vary significantly - with larger numbers indicating a propensity towards membership or at least attending an event.  
Next we'll quantify what we can and mine for possible categorical data.

In [105]:
for frame in dfs:   
    frame[1]['desc_len'] = frame[1].description.apply(lambda x: 0 if x is np.nan else len(x))
    frame[1]['headline_len'] = frame[1].headline.apply(lambda x: 0 if x is np.nan else len(x))
    print(f'Examining {frame[0]} leads.')
    print(f'Mean length of headline: {frame[1].headline_len.mean()}, median length of headline: {frame[1].headline_len.median()}')
    print(f'Mean length of description: {frame[1].desc_len.mean()}, median length of description: {frame[1].desc_len.median()}')

Examining won leads.
Mean length of headline: 62.0, median length of headline: 50.0
Mean length of description: 570.7, median length of description: 398.5
Examining churned leads.
Mean length of headline: 65.59235668789809, median length of headline: 49.0
Mean length of description: 756.6751592356688, median length of description: 432.0
Examining combined positive leads.
Mean length of headline: 65.37724550898204, median length of headline: 49.0
Mean length of description: 745.5389221556886, median length of description: 432.0
Examining lost leads.
Mean length of headline: 59.005524861878456, median length of headline: 50.0
Mean length of description: 684.5911602209944, median length of description: 440.0


The length of sections themselves shows little variabiltiy, although it's curious that won and previously won leads show a slight tendency to write longer headlines and shorter descriptions. Next we'll look at contents of descriptions and headlines to see if there's any difference in branding oneself that correlates with purchasing membership or an event ticket.

In [106]:
def inspect_titles(text):
    titles = (('owner','owner'),
        ('founder|co-founder','founder'),
        ('ceo| chief executive','ceo'),
        ('c[a-z]{2}| chief .+ officer','c-level'),
        ('entrepren','entrepreneur'),
        ('managing [a-z]+| director| s*vp| head of ','higher_management'))
    
    title_list = []
    for pair in titles:
        if text is not np.nan:
            if re.search(pair[0], text.lower()):
                title_list.append(pair[1])
    return title_list

In [107]:
for frame in dfs:
    frame[1]['title_list_desc']= frame[1].description.apply(inspect_titles)
    frame[1]['title_list_headline']= frame[1].headline.apply(inspect_titles)
    frame[1]['desc_titles'] = frame[1].title_list_desc.apply(lambda x: len(x))
    frame[1]['headline_titles'] = frame[1].title_list_headline.apply(lambda x: len(x))
    print(f'Examining {frame[0]} leads.')
    print(f'Mean number of titles in headline: {frame[1].headline_titles.mean()}, median number of titles in headline: {frame[1].headline_titles.median()}')
    print(f'Mean number of titles in description: {frame[1].desc_titles.mean()}, median number of titles in description: {frame[1].desc_titles.median()}')

Examining won leads.
Mean number of titles in headline: 1.7, median number of titles in headline: 1.5
Mean number of titles in description: 1.1, median number of titles in description: 1.0
Examining churned leads.
Mean number of titles in headline: 1.5987261146496816, median number of titles in headline: 1.0
Mean number of titles in description: 1.5286624203821657, median number of titles in description: 1.0
Examining combined positive leads.
Mean number of titles in headline: 1.6047904191616766, median number of titles in headline: 1.0
Mean number of titles in description: 1.5029940119760479, median number of titles in description: 1.0
Examining lost leads.
Mean number of titles in headline: 1.4364640883977902, median number of titles in headline: 1.0
Mean number of titles in description: 1.4751381215469612, median number of titles in description: 1.0


In [108]:
for col in df.columns:
    for frame in dfs:
        print(f'Leads: {frame[0]}, missing values in {col}: {round(((frame[1][col].isna().sum())/frame[1].shape[0]),2)*100}%')
    print('='*100)

Leads: won, missing values in linkedinProfile: 0.0%
Leads: churned, missing values in linkedinProfile: 0.0%
Leads: combined positive, missing values in linkedinProfile: 0.0%
Leads: lost, missing values in linkedinProfile: 0.0%
Leads: won, missing values in Result: 0.0%
Leads: churned, missing values in Result: 0.0%
Leads: combined positive, missing values in Result: 0.0%
Leads: lost, missing values in Result: 0.0%
Leads: won, missing values in Other: 0.0%
Leads: churned, missing values in Other: 100.0%
Leads: combined positive, missing values in Other: 94.0%
Leads: lost, missing values in Other: 0.0%
Leads: won, missing values in email: 30.0%
Leads: churned, missing values in email: 36.0%
Leads: combined positive, missing values in email: 35.0%
Leads: lost, missing values in email: 40.0%
Leads: won, missing values in description: 30.0%
Leads: churned, missing values in description: 25.0%
Leads: combined positive, missing values in description: 26.0%
Leads: lost, missing values in descr

Doesn't look like gaps in any of the aspects of profiles can tell us anything meaningful about the difference between the groups.

In [109]:
for frame in dfs:
    frame[1]['jobDateRange2'] = frame[1]['jobDateRange2'].fillna(' ')
    frame[1]['two_jobs'] = frame[1].jobDateRange2.apply(lambda x: True if 'Present' in str(x) else False)
    print(f'Examining {frame[0]} leads.')
    print(f'Share of people combining jobs: {round((frame[1].two_jobs.sum()/frame[1].shape[0]),2)}')

Examining won leads.
Share of people combining jobs: 0.9
Examining churned leads.
Share of people combining jobs: 0.6
Examining combined positive leads.
Share of people combining jobs: 0.62
Examining lost leads.
Share of people combining jobs: 0.7


It appears that having two jobs at once used to somewhat reduce the chance of becoming a member; however it's not yet clear how significant this correlation is.

## Statistical significance

A couple of differences we were able to spot warrant running test of statistical significance on. We will be comparing two groups of profiles: those who purchased membership or a ticket event + those who were members in the past, and those who failed to do either of those after the last outreach.  
We'll begin with the number of subscribers - we suspect that people cultivating a larger number of followers on LinkedIn are more likely to become members and attend events as the prime networking and self-promoting opportunities.  
We'll start with a formal check of normality of distribution using the Shapiro-Wilk test, H0 being that the data is distributed normally, and H1 being that the distribution is not normal.

In [113]:
alpha = 0.01
dfs_2 = (('combined positive', good_combined), ('lost',lost))

In [120]:
for frame in dfs_2:
    print(f'Checking distribution in {frame[0]}:')
    p_val = st.shapiro(frame[1].subscribers.dropna()).pvalue
    if  p_val > alpha:
        print(f'P-value: {p_val}. Can not reject the null hypothesis, the number of subscribers is distributed normally.')
    else:
        print(f'P-value: {p_val}. The null hypothesis should be rejected, the number of subscribers is distributed not normally.')

Checking distribution in combined positive:
P-value: 1.1725375345625936e-17. The null hypothesis should be rejected, the number of subscribers is distributed not normally.
Checking distribution in lost:
P-value: 1.0155986513698732e-22. The null hypothesis should be rejected, the number of subscribers is distributed not normally.


As the data is not distributed normally we will proceed with using the The Mann-Whitney U test to check whether the observed difference is significant.  
H0: the average number of subscribers in both groups is the same.  
H1: the average number of subscribers in the 'good_combined' group is higher.

In [123]:
x = good_combined.subscribers.dropna()
y = lost.subscribers.dropna()
p_val = st.mannwhitneyu(x,y, alternative='greater').pvalue
if  p_val < alpha:
    print(f'P-value: {p_val}. We reject the null hypothesis, the average number of subsribers in "good_combined" is higher.')
else:
    print(f'P-value: {p_val}. We can not reject the null hypothesis, there is no significant difference between the numbers of subscribers in two groups.')

P-value: 0.09707386827171183. We can not reject the null hypothesis, there is no significant difference between the numbers of subscribers in two groups.
