In [7]:
from sklearn.utils import shuffle
from pandas import DataFrame, Series

In [10]:
df = DataFrame({'A':[1,2,3]})

s = Series([2,0,1],index=[2,0, 1])
s

2    2
0    0
1    1
dtype: int64

In [27]:
df.sample(n=3)

Unnamed: 0,B,A
0,0,1
2,2,3
1,1,2


In [15]:
df.insert(0, 'B', s)

ValueError: cannot insert B, already exists

In [16]:
df

Unnamed: 0,B,A
0,0,1
1,1,2
2,2,3


In [None]:
dates = shuffle(dates)

In [3]:
set([1, 2]) == set([2,1])

True

In [529]:
from pandas import DataFrame

In [532]:
df = DataFrame({'x':['a','a','a','b','b','b'],  'y':[0,1,0,1,1,0]})
df

Unnamed: 0,x,y
0,a,0
1,a,1
2,a,0
3,b,1
4,b,1
5,b,0


In [538]:
d = df.groupby('x')['y'].agg([len, sum]).rename(columns={'len':'sample_n', 'sum':'positive_n'})
d

Unnamed: 0_level_0,sample_n,positive_n
x,Unnamed: 1_level_1,Unnamed: 2_level_1
a,3,1
b,3,2


In [539]:
d['negative_n'] = d['sample_n'] - d['positive_n']
d

Unnamed: 0_level_0,sample_n,positive_n,negative_n
x,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
a,3,1,2
b,3,2,1


In [518]:
class Node:
    def __init__(self, name, parent=None, depth_prefix=''):
        self.name = name
        self.depth_prefix = depth_prefix

        self.children = []
        self.dependencies = []
        
        self.parent = parent
    
        if parent:
            self.depth = parent.depth + 1
            self.parent.children.append(self)            
        else:
            self.depth = 1
            
    def add_dependency(self, dependency):
        if self.children:
            for child in self.children:
                child.add_dependency(dependency)                
        else:
            self.dependencies.append(dependency)

    def add_dependencies(self, dependencies):
        for dependency in dependencies:
            self.add_dependency(dependency)

            
    def to_dict(self):
        d = {self.depth_prefix + '_level_' + str(self.depth): self.name}
        if self.parent:
            d.update(self.parent.to_dict())
            
        return d
    
    def to_dict_with_level(self):
        d = self.to_dict()
        if self.parent:
            d.update(self.parent.to_dict())
        return d
    
    def to_dict_with_dependency(self, recursive=False):
        data = []
        d = self.to_dict_with_level()
        
        if self.dependencies:
            for dependency in self.dependencies:
                if recursive:
                    d_list = dependency.to_dict_with_dependency()
                else:
                    d_list = [dependency.to_dict_with_level()]
                    
                for d_ in d_list:
                    full_d = d.copy()
                    full_d.update(d_)
                    data.append(full_d)
        else:
            data.append(self.to_dict_with_level())

        return data
        
    
#     def to_data(self, include_dependency=True):
#         data = []
#         d = self.to_dict()
        
#         if self.children:
#             for child in self.children:
#                 for child_dict in child.to_data():
#                     d = d.copy()
#                     d.update(child_dict)
#                     data.append(d)
#         else:
#             data = [d]
            
        return data
        

class SkillPoint(Node):
    def __init__(self, skill_id, name, parent=None):
        super().__init__(name, parent, depth_prefix='skill_point')
    
    
class Task(Node):
    def __init__(self, task_id, name, parent=None):
        super().__init__(name, parent, depth_prefix='task')

        
class Resource(Node):
    def __init__(self, resource_id, name, url=None, parent=None):
        super().__init__(name, parent, depth_prefix='resource')
    

class SkillBuilder:
    def __init__(self):
        self.skill_id = 0
        
    def build_skill(self, *args, **kwargs):
        skill_point = SkillPoint(self.skill_id, *args, **kwargs)
        self.skill_id += 1
        return skill_point
        

class TaskBuilder:
    def __init__(self):
        self.task_id = 0
        self.tasks = []
        
    def build_task(self, *args, **kwargs):
        task = Task(self.task_id, *args, **kwargs)
        self.task_id += 1
        
        self.tasks.append(task)
        
        return task
    
    def get_end_tasks(self):
        return [task for task in self.tasks if len(task.children) == 0]


class ResourceBuilder:
    def __init__(self):
        self.resource_id = 0
    
    def build_resource(self, *args, **kwargs):
        resource = Resource(self.resource_id, *args, **kwargs)
        self.resource_id += 1
        return resource

In [519]:
skill_builder = SkillBuilder()
task_builder = TaskBuilder()
resource_builder = ResourceBuilder()

In [520]:
python_for_data_anlysis_resource = resource_builder.build_resource('python_for_data_analysis')
pandas_documentation_resource = resource_builder.build_resource('pandas documentation', url='http://pandas.pydata.org/pandas-docs/stable/')
sklearn_documentation_resource = resource_builder.build_resource('sklearn documentation', url='http://scikit-learn.org/stable/documentation.html')
sqlalchemy_core_documentation_resource = resource_builder.build_resource('sqlalchemy_core_documentation', url='https://docs.sqlalchemy.org/en/latest/core/tutorial.html')
sqlalchemy_orm_documentation_resource = resource_builder.build_resource('sqlalchemy_orm_documentation', url='https://docs.sqlalchemy.org/en/latest/orm/tutorial.html')
flask_documentation_resource = resource_builder.build_resource('flask_documentation', url='http://flask.pocoo.org/docs/1.0/')
git_tutorial_resource = resource_builder.build_resource('廖雪峰git教程', url='https://www.liaoxuefeng.com/wiki/0013739516305929606dd18361248578c67b8067c8c017b000') 

In [521]:
python_skill = skill_builder.build_skill('python')

pandas_skill = skill_builder.build_skill('pandas', parent=python_skill)
pandas_skill.add_dependency(python_for_data_anlysis_resource)

logging_skill = skill_builder.build_skill('logging', parent=python_skill)

sqlalchemy_core_skill = skill_builder.build_skill('sqlalchemy_core', parent=python_skill)

re_skill = skill_builder.build_skill('re', parent=python_skill)

sklearn_skill = skill_builder.build_skill('sklearn', parent=python_skill)
xgboost_skill = skill_builder.build_skill('xgboost', parent=python_skill)
flask_skill = skill_builder.build_skill('flask', parent=python_skill)


beautiful_soup_skill = skill_builder.build_skill('beatiful_soup', parent=python_skill)
requests_skill = skill_builder.build_skill('requests', parent=python_skill)

regular_expression_skill = skill_builder.build_skill('regular_expression')

git_skill = skill_builder.build_skill('git')
git_skill.add_dependency(git_tutorial_resource)

docker_skill = skill_builder.build_skill('docker')



bash_skill = skill_builder.build_skill('bash')

In [522]:
data_acquisition_task = task_builder.build_task('data_acquisition')
database_data_acquisition_task = task_builder.build_task('database_data_acquisition', data_acquisition_task)
web_data_acquisition_task = task_builder.build_task('web_data_acquisition', data_acquisition_task)


data_manipulation_task = task_builder.build_task('data_manipulation')
data_cleaning_task = task_builder.build_task('data_cleaning', data_manipulation_task)
data_engineering_task = task_builder.build_task('data_engineering', data_manipulation_task)
descriptive_analysis_task = task_builder.build_task('descriptive_analysis', data_manipulation_task)
predictive_modeling_task = task_builder.build_task('predictive_modeling', data_manipulation_task)

code_managament_task = task_builder.build_task('code_management')

debug_task = task_builder.build_task('debug')

service_task = task_builder.build_task('service_providing')

deployment_task = task_builder.build_task('deployment')

data_manipulation_task.add_dependencies([pandas_skill, sqlalchemy_core_skill, re_skill])
predictive_modeling_task.add_dependencies([sklearn_skill, xgboost_skill])

code_managament_task.add_dependencies([git_skill])
deployment_task.add_dependencies([bash_skill, docker_skill])

service_task.add_dependencies([flask_skill])



In [526]:
data= []
for task in task_builder.get_end_tasks():
    data += task.to_dict_with_dependency(recursive=False)

In [528]:
DataFrame.from_dict(data)

Unnamed: 0,skill_point_level_1,skill_point_level_2,task_level_1,task_level_2
0,,,data_acquisition,database_data_acquisition
1,,,data_acquisition,web_data_acquisition
2,python,pandas,data_manipulation,data_cleaning
3,python,sqlalchemy_core,data_manipulation,data_cleaning
4,python,re,data_manipulation,data_cleaning
5,python,pandas,data_manipulation,data_engineering
6,python,sqlalchemy_core,data_manipulation,data_engineering
7,python,re,data_manipulation,data_engineering
8,python,pandas,data_manipulation,descriptive_analysis
9,python,sqlalchemy_core,data_manipulation,descriptive_analysis


In [527]:
data

[{'task_level_2': 'database_data_acquisition',
  'task_level_1': 'data_acquisition'},
 {'task_level_2': 'web_data_acquisition', 'task_level_1': 'data_acquisition'},
 {'task_level_2': 'data_cleaning',
  'task_level_1': 'data_manipulation',
  'skill_point_level_2': 'pandas',
  'skill_point_level_1': 'python'},
 {'task_level_2': 'data_cleaning',
  'task_level_1': 'data_manipulation',
  'skill_point_level_2': 'sqlalchemy_core',
  'skill_point_level_1': 'python'},
 {'task_level_2': 'data_cleaning',
  'task_level_1': 'data_manipulation',
  'skill_point_level_2': 're',
  'skill_point_level_1': 'python'},
 {'task_level_2': 'data_engineering',
  'task_level_1': 'data_manipulation',
  'skill_point_level_2': 'pandas',
  'skill_point_level_1': 'python'},
 {'task_level_2': 'data_engineering',
  'task_level_1': 'data_manipulation',
  'skill_point_level_2': 'sqlalchemy_core',
  'skill_point_level_1': 'python'},
 {'task_level_2': 'data_engineering',
  'task_level_1': 'data_manipulation',
  'skill_poin